diff --git a/LICENSE.txt b/LICENSE.txt index 2e08754fea8..e1fbdc18607 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -359,7 +359,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. The binary distribution of this product bundles these dependencies under the following license: -re2j 1.0 +re2j 1.1 --------------------------------------------------------------------- (GO license) This is a work derived from Russ Cox's RE2 in Go, whose license diff --git a/NOTICE.txt b/NOTICE.txt index c41972b045f..0718909cb1b 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -8,19 +8,10 @@ following notices: * Copyright 2011 FuseSource Corp. http://fusesource.com The binary distribution of this product bundles binaries of -org.fusesource.hawtjni:hawtjni-runtime (https://github.com/fusesource/hawtjni), -which has the following notices: -* This product includes software developed by FuseSource Corp. - http://fusesource.com -* This product includes software developed at - Progress Software Corporation and/or its subsidiaries or affiliates. -* This product includes software developed by IBM Corporation and others. - -The binary distribution of this product bundles binaries of -AWS SDK for Java - Core 1.11.45, -AWS Java SDK for AWS KMS 1.11.45, -AWS Java SDK for Amazon S3 1.11.45, -AWS Java SDK for AWS STS 1.11.45, +AWS SDK for Java - Bundle 1.11.134, +AWS Java SDK for AWS KMS 1.11.134, +AWS Java SDK for Amazon S3 1.11.134, +AWS Java SDK for AWS STS 1.11.134, JMES Path Query library 1.0, which has the following notices: * This software includes third party software subject to the following @@ -303,15 +294,15 @@ which has the following notices: notice. The binary distribution of this product bundles binaries of -Jetty :: Http Utility 9.3.11., -Jetty :: IO Utility 9.3.11., -Jetty :: Security 9.3.11., -Jetty :: Server Core 9.3.11., -Jetty :: Servlet Handling 9.3.11., -Jetty :: Utilities 9.3.11., +Jetty :: Http Utility 9.3.19., +Jetty :: IO Utility 9.3.19., +Jetty :: Security 9.3.19., +Jetty :: Server Core 9.3.19., +Jetty :: Servlet Handling 9.3.19., +Jetty :: Utilities 9.3.19., Jetty :: Utilities :: Ajax, -Jetty :: Webapp Application Support 9.3.11., -Jetty :: XML utilities 9.3.11., +Jetty :: Webapp Application Support 9.3.19., +Jetty :: XML utilities 9.3.19., which has the following notices: * ============================================================== Jetty Web Container @@ -481,15 +472,15 @@ which has the following notices: Copyright (C) 1999-2012, QOS.ch. All rights reserved. The binary distribution of this product bundles binaries of -Apache HBase - Annotations 1.2.4, -Apache HBase - Client 1.2.4, -Apache HBase - Common 1.2.4, -Apache HBase - Hadoop Compatibility 1.2.4, -Apache HBase - Hadoop Two Compatibility 1.2.4, -Apache HBase - Prefix Tree 1.2.4, -Apache HBase - Procedure 1.2.4, -Apache HBase - Protocol 1.2.4, -Apache HBase - Server 1.2.4, +Apache HBase - Annotations 1.2.6, +Apache HBase - Client 1.2.6, +Apache HBase - Common 1.2.6, +Apache HBase - Hadoop Compatibility 1.2.6, +Apache HBase - Hadoop Two Compatibility 1.2.6, +Apache HBase - Prefix Tree 1.2.6, +Apache HBase - Procedure 1.2.6, +Apache HBase - Protocol 1.2.6, +Apache HBase - Server 1.2.6, which has the following notices: * Apache HBase Copyright 2007-2015 The Apache Software Foundation @@ -576,3 +567,17 @@ The binary distribution of this product bundles binaries of software.amazon.ion:ion-java 1.0.1, which has the following notices: * Amazon Ion Java Copyright 2007-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + +The binary distribution of this product bundles binaries of +joda-time:joda-time:2.9.9 +which has the following notices: + * ============================================================================= + = NOTICE file corresponding to section 4d of the Apache License Version 2.0 = + ============================================================================= + This product includes software developed by + Joda.org (http://www.joda.org/). + +The binary distribution of this product bundles binaries of +Ehcache 3.3.1, +which has the following notices: + * Ehcache V3 Copyright 2014-2016 Terracotta, Inc. diff --git a/hadoop-client-modules/hadoop-client-api/pom.xml b/hadoop-client-modules/hadoop-client-api/pom.xml index a50ffedd8a7..edbddc9ff1d 100644 --- a/hadoop-client-modules/hadoop-client-api/pom.xml +++ b/hadoop-client-modules/hadoop-client-api/pom.xml @@ -182,6 +182,21 @@ io/serializations + + + javax/el/ + ${shaded.dependency.prefix}.javax.el. + + **/pom.xml + + + + javax/cache/ + ${shaded.dependency.prefix}.javax.cache. + + **/pom.xml + + javax/servlet/ ${shaded.dependency.prefix}.javax.servlet. @@ -189,6 +204,13 @@ **/pom.xml + + javax/ws/ + ${shaded.dependency.prefix}.javax.ws. + + **/pom.xml + + net/ ${shaded.dependency.prefix}.net. @@ -199,6 +221,11 @@ net/topology/**/* + + + okio/ + ${shaded.dependency.prefix}.okio. + diff --git a/hadoop-client-modules/hadoop-client-check-invariants/pom.xml b/hadoop-client-modules/hadoop-client-check-invariants/pom.xml index 75f4d198a31..c6a8552765b 100644 --- a/hadoop-client-modules/hadoop-client-check-invariants/pom.xml +++ b/hadoop-client-modules/hadoop-client-check-invariants/pom.xml @@ -25,7 +25,13 @@ 3.1.0-SNAPSHOT pom - Enforces our invariants for the api and runtime client modules. + + Enforces our invariants for the api and runtime client modules. + E.g. that modules have a specific set of transitive dependencies + and shaded artifacts only contain classes that are in particular + packages. Does the enforcement through the maven-enforcer-plugin + and an integration test. + Apache Hadoop Client Packaging Invariants @@ -82,6 +88,8 @@ commons-logging:commons-logging log4j:log4j + + com.google.code.findbugs:jsr305 @@ -97,7 +105,6 @@ - + + put-client-artifacts-in-a-property + pre-integration-test + + build-classpath + + + true + hadoop-client-artifacts + + + + + + + org.codehaus.mojo + exec-maven-plugin + + + check-jar-contents + integration-test + + exec + + + ${shell-executable} + ${project.build.testOutputDirectory} + false + + ensure-jars-have-correct-contents.sh + ${hadoop-client-artifacts} + + + + + diff --git a/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh b/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh new file mode 100644 index 00000000000..84efe7e08ce --- /dev/null +++ b/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Usage: $0 [/path/to/some/example.jar:/path/to/another/example/created.jar] +# +# accepts a single command line argument with a colon separated list of +# paths to jars to check. Iterates through each such passed jar and checks +# all the contained paths to make sure they follow the below constructed +# safe list. + +# we have to allow the directories that lead to the org/apache/hadoop dir +allowed_expr="(^org/$|^org/apache/$" +# We allow the following things to exist in our client artifacts: +# * classes in packages that start with org.apache.hadoop, which by +# convention should be in a path that looks like org/apache/hadoop +allowed_expr+="|^org/apache/hadoop/" +# * whatever in the "META-INF" directory +allowed_expr+="|^META-INF/" +# * whatever under the "webapps" directory; for things shipped by yarn +allowed_expr+="|^webapps/" +# * Hadoop's default configuration files, which have the form +# "_module_-default.xml" +allowed_expr+="|^[^-]*-default.xml$" +# * Hadoop's versioning properties files, which have the form +# "_module_-version-info.properties" +allowed_expr+="|^[^-]*-version-info.properties$" +# * Hadoop's application classloader properties file. +allowed_expr+="|^org.apache.hadoop.application-classloader.properties$" +# public suffix list used by httpcomponents +allowed_expr+="|^mozilla/$" +allowed_expr+="|^mozilla/public-suffix-list.txt$" +# Comes from commons-configuration, not sure if relocatable. +allowed_expr+="|^properties.dtd$" +allowed_expr+="|^PropertyList-1.0.dtd$" +# Comes from Ehcache, not relocatable at top level due to limitation +# of shade plugin AFAICT +allowed_expr+="|^ehcache-core.xsd$" +allowed_expr+="|^ehcache-107ext.xsd$" +# Comes from kerby's kerb-simplekdc, not relocatable since at top level +allowed_expr+="|^krb5-template.conf$" +allowed_expr+="|^krb5_udp-template.conf$" +# Jetty uses this style sheet for directory listings. TODO ensure our +# internal use of jetty disallows directory listings and remove this. +allowed_expr+="|^jetty-dir.css$" + +allowed_expr+=")" +declare -i bad_artifacts=0 +declare -a bad_contents +IFS=: read -r -d '' -a artifact_list < <(printf '%s\0' "$1") +for artifact in "${artifact_list[@]}"; do + bad_contents=($(jar tf "${artifact}" | grep -v -E "${allowed_expr}")) + if [ ${#bad_contents[@]} -gt 0 ]; then + echo "[ERROR] Found artifact with unexpected contents: '${artifact}'" + echo " Please check the following and either correct the build or update" + echo " the allowed list with reasoning." + echo "" + for bad_line in "${bad_contents[@]}"; do + echo " ${bad_line}" + done + bad_artifacts=${bad_artifacts}+1 + else + echo "[INFO] Artifact looks correct: '$(basename "${artifact}")'" + fi +done + +if [ "${bad_artifacts}" -gt 0 ]; then + exit 1 +fi diff --git a/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml b/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml index d0d071279bf..691b5455994 100644 --- a/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml +++ b/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml @@ -25,7 +25,13 @@ 3.1.0-SNAPSHOT pom - Enforces our invariants for the testing client modules. + + Enforces our invariants for the test client modules. + E.g. that modules have a specific set of transitive dependencies + and shaded artifacts only contain classes that are in particular + packages. Does the enforcement through the maven-enforcer-plugin + and an integration test. + Apache Hadoop Client Packaging Invariants for Test @@ -90,6 +96,8 @@ junit:junit org.hamcrest:hamcrest-core + + com.google.code.findbugs:jsr305 @@ -105,7 +113,6 @@ - + + org.apache.maven.plugins + maven-dependency-plugin + + + put-client-artifacts-in-a-property + pre-integration-test + + build-classpath + + + + hadoop-client-api,hadoop-client-runtime + true + hadoop-client-artifacts + + + + + + + org.codehaus.mojo + exec-maven-plugin + + + check-jar-contents + integration-test + + exec + + + ${shell-executable} + ${project.build.testOutputDirectory} + false + + ensure-jars-have-correct-contents.sh + ${hadoop-client-artifacts} + + + + + diff --git a/hadoop-client-modules/hadoop-client-check-test-invariants/src/test/resources/ensure-jars-have-correct-contents.sh b/hadoop-client-modules/hadoop-client-check-test-invariants/src/test/resources/ensure-jars-have-correct-contents.sh new file mode 100644 index 00000000000..fb9f4f920b6 --- /dev/null +++ b/hadoop-client-modules/hadoop-client-check-test-invariants/src/test/resources/ensure-jars-have-correct-contents.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Usage: $0 [/path/to/some/example.jar:/path/to/another/example/created.jar] +# +# accepts a single command line argument with a colon separated list of +# paths to jars to check. Iterates through each such passed jar and checks +# all the contained paths to make sure they follow the below constructed +# safe list. + +# we have to allow the directories that lead to the org/apache/hadoop dir +allowed_expr="(^org/$|^org/apache/$" +# We allow the following things to exist in our client artifacts: +# * classes in packages that start with org.apache.hadoop, which by +# convention should be in a path that looks like org/apache/hadoop +allowed_expr+="|^org/apache/hadoop/" +# * whatever in the "META-INF" directory +allowed_expr+="|^META-INF/" +# * whatever under the "webapps" directory; for minicluster UIs +allowed_expr+="|^webapps/" +# * Hadoop's default configuration files, which have the form +# "_module_-default.xml" +allowed_expr+="|^[^-]*-default.xml$" +# * Hadoop's versioning properties files, which have the form +# "_module_-version-info.properties" +allowed_expr+="|^[^-]*-version-info.properties$" +# * Hadoop's application classloader properties file. +allowed_expr+="|^org.apache.hadoop.application-classloader.properties$" +# * Used by JavaSandboxLinuxContainerRuntime as a default, loaded +# from root, so can't relocate. :( +allowed_expr+="|^java.policy$" + + +allowed_expr+=")" +declare -i bad_artifacts=0 +declare -a bad_contents +IFS=: read -r -d '' -a artifact_list < <(printf '%s\0' "$1") +for artifact in "${artifact_list[@]}"; do + bad_contents=($(jar tf "${artifact}" | grep -v -E "${allowed_expr}")) + if [ ${#bad_contents[@]} -gt 0 ]; then + echo "[ERROR] Found artifact with unexpected contents: '${artifact}'" + echo " Please check the following and either correct the build or update" + echo " the allowed list with reasoning." + echo "" + for bad_line in "${bad_contents[@]}"; do + echo " ${bad_line}" + done + bad_artifacts=${bad_artifacts}+1 + else + echo "[INFO] Artifact looks correct: '$(basename "${artifact}")'" + fi +done + +if [ "${bad_artifacts}" -gt 0 ]; then + exit 1 +fi diff --git a/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseMiniCluster.java b/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseMiniCluster.java index 5fcbe13c977..6022fbc688d 100644 --- a/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseMiniCluster.java +++ b/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseMiniCluster.java @@ -77,7 +77,9 @@ public void clusterUp() throws IOException { @After public void clusterDown() { - cluster.close(); + if (cluster != null) { + cluster.close(); + } } @Test diff --git a/hadoop-client-modules/hadoop-client-minicluster/pom.xml b/hadoop-client-modules/hadoop-client-minicluster/pom.xml index ba84e87802c..00f2d254035 100644 --- a/hadoop-client-modules/hadoop-client-minicluster/pom.xml +++ b/hadoop-client-modules/hadoop-client-minicluster/pom.xml @@ -75,6 +75,9 @@ provided --> + org.apache.hadoop hadoop-minicluster @@ -282,6 +285,12 @@ + + + org.mockito + mockito-all + true + @@ -339,40 +348,12 @@ jersey-servlet true - - org.eclipse.jdt - core - true - net.sf.kosmosfs kfs true - - net.java.dev.jets3t - jets3t - true - - - commons-codec - commons-codec - - - commons-logging - commons-logging - - - org.apache.httpcomponents - httpclient - - - org.apache.httpcomponents - httpcore - - - com.jcraft jsch @@ -586,6 +567,7 @@ org.slf4j:slf4j-api commons-logging:commons-logging junit:junit + com.google.code.findbugs:jsr305 org.apache.hadoop:hadoop-yarn-server-timelineservice log4j:log4j @@ -635,6 +617,53 @@ testshell/* + + + org.mockito:mockito-all + + asm-license.txt + cglib-license.txt + hamcrest-license.txt + objenesis-license.txt + org/hamcrest/**/*.class + org/hamcrest/*.class + + + + + org.glassfish.grizzly:grizzly-http-servlet + + catalog.cat + javaee_5.xsd + javaee_6.xsd + javaee_web_services_client_1_2.xsd + javaee_web_services_client_1_3.xsd + jsp_2_1.xsd + jsp_2_2.xsd + web-app_2_5.xsd + web-app_3_0.xsd + web-common_3_0.xsd + xml.xsd + + + + + org.eclipse.jetty:* + + about.html + + + + org.apache.hadoop:* + + + log4j.properties + container-log4j.properties + + capacity-scheduler.xml + krb5.conf + + @@ -717,6 +746,7 @@ **/pom.xml + javax/el/ ${shaded.dependency.prefix}.javax.el. @@ -724,6 +754,13 @@ **/pom.xml + + javax/cache/ + ${shaded.dependency.prefix}.javax.cache. + + **/pom.xml + + javax/inject/ ${shaded.dependency.prefix}.javax.inject. @@ -738,6 +775,13 @@ **/pom.xml + + javax/ws/ + ${shaded.dependency.prefix}.javax.ws. + + **/pom.xml + + jersey/ ${shaded.dependency.prefix}.jersey. @@ -755,6 +799,11 @@ net/topology/**/* + + + okio/ + ${shaded.dependency.prefix}.okio. + diff --git a/hadoop-client-modules/hadoop-client-runtime/pom.xml b/hadoop-client-modules/hadoop-client-runtime/pom.xml index 6879a23f08c..7ed5ba7c071 100644 --- a/hadoop-client-modules/hadoop-client-runtime/pom.xml +++ b/hadoop-client-modules/hadoop-client-runtime/pom.xml @@ -94,6 +94,11 @@ commons-logging runtime + + com.google.code.findbugs + jsr305 + runtime + @@ -149,6 +154,9 @@ commons-logging:commons-logging log4j:log4j + + + com.google.code.findbugs:jsr305 @@ -181,6 +189,28 @@ META-INF/services/javax.* + + + org.apache.commons:commons-math3 + + assets/org/apache/commons/math3/**/* + + + + + org.eclipse.jetty:* + + about.html + + + + + org.apache.kerby:kerb-util + + keytab.txt + ccache.txt + + @@ -245,6 +275,7 @@ io/serializations + javax/el/ ${shaded.dependency.prefix}.javax.el. @@ -252,6 +283,13 @@ **/pom.xml + + javax/cache/ + ${shaded.dependency.prefix}.javax.cache. + + **/pom.xml + + javax/servlet/ ${shaded.dependency.prefix}.javax.servlet. @@ -259,6 +297,13 @@ **/pom.xml + + javax/ws/ + ${shaded.dependency.prefix}.javax.ws. + + **/pom.xml + + net/ ${shaded.dependency.prefix}.net. @@ -269,6 +314,11 @@ net/topology/**/* + + + okio/ + ${shaded.dependency.prefix}.okio. + - - org.slf4j - slf4j-log4j12 - - - org.apache.zookeeper - zookeeper - - - - org.apache.hadoop hadoop-yarn-api @@ -226,6 +177,37 @@ + + org.apache.hadoop + hadoop-yarn-client + compile + + + + org.apache.hadoop + hadoop-yarn-api + + + org.apache.hadoop + hadoop-annotations + + + com.google.guava + guava + + + commons-cli + commons-cli + + + log4j + log4j + + + + org.apache.hadoop hadoop-mapreduce-client-core diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/JWTRedirectAuthenticationHandler.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/JWTRedirectAuthenticationHandler.java index 61f5b9e8943..884398cb799 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/JWTRedirectAuthenticationHandler.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/JWTRedirectAuthenticationHandler.java @@ -28,6 +28,7 @@ import java.security.interfaces.RSAPublicKey; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.apache.hadoop.security.authentication.util.CertificateUtil; import org.slf4j.Logger; @@ -216,7 +217,8 @@ protected String getJWTFromCookie(HttpServletRequest req) { * @param request for getting the original request URL * @return url to use as login url for redirect */ - protected String constructLoginURL(HttpServletRequest request) { + @VisibleForTesting + String constructLoginURL(HttpServletRequest request) { String delimiter = "?"; if (authenticationProviderUrl.contains("?")) { delimiter = "&"; diff --git a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestJWTRedirectAuthentictionHandler.java b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestJWTRedirectAuthenticationHandler.java similarity index 95% rename from hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestJWTRedirectAuthentictionHandler.java rename to hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestJWTRedirectAuthenticationHandler.java index 97a8a9d2c5d..5a2db9ba6fd 100644 --- a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestJWTRedirectAuthentictionHandler.java +++ b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestJWTRedirectAuthenticationHandler.java @@ -47,7 +47,7 @@ import com.nimbusds.jwt.SignedJWT; import com.nimbusds.jose.crypto.RSASSASigner; -public class TestJWTRedirectAuthentictionHandler extends +public class TestJWTRedirectAuthenticationHandler extends KerberosSecurityTestcase { private static final String SERVICE_URL = "https://localhost:8888/resource"; private static final String REDIRECT_LOCATION = @@ -392,7 +392,7 @@ public void testOrigURLWithQueryString() throws Exception { new StringBuffer(SERVICE_URL)); Mockito.when(request.getQueryString()).thenReturn("name=value"); - String loginURL = ((TestJWTRedirectAuthenticationHandler)handler).testConstructLoginURL(request); + String loginURL = handler.constructLoginURL(request); Assert.assertNotNull("loginURL should not be null.", loginURL); Assert.assertEquals("https://localhost:8443/authserver?originalUrl=" + SERVICE_URL + "?name=value", loginURL); } @@ -409,7 +409,7 @@ public void testOrigURLNoQueryString() throws Exception { new StringBuffer(SERVICE_URL)); Mockito.when(request.getQueryString()).thenReturn(null); - String loginURL = ((TestJWTRedirectAuthenticationHandler)handler).testConstructLoginURL(request); + String loginURL = handler.constructLoginURL(request); Assert.assertNotNull("LoginURL should not be null.", loginURL); Assert.assertEquals("https://localhost:8443/authserver?originalUrl=" + SERVICE_URL, loginURL); } @@ -425,7 +425,7 @@ public void setup() throws Exception, NoSuchAlgorithmException { publicKey = (RSAPublicKey) kp.getPublic(); privateKey = (RSAPrivateKey) kp.getPrivate(); - handler = new TestJWTRedirectAuthenticationHandler(); + handler = new JWTRedirectAuthenticationHandler(); } protected void setupKerberosRequirements() throws Exception { @@ -453,15 +453,16 @@ protected Properties getProperties() { protected SignedJWT getJWT(String sub, Date expires, RSAPrivateKey privateKey) throws Exception { - JWTClaimsSet claimsSet = new JWTClaimsSet(); - claimsSet.setSubject(sub); - claimsSet.setIssueTime(new Date(new Date().getTime())); - claimsSet.setIssuer("https://c2id.com"); - claimsSet.setCustomClaim("scope", "openid"); - claimsSet.setExpirationTime(expires); + JWTClaimsSet claimsSet = new JWTClaimsSet.Builder() + .subject(sub) + .issueTime(new Date(new Date().getTime())) + .issuer("https://c2id.com") + .claim("scope", "openid") + .audience("bar") + .expirationTime(expires) + .build(); List aud = new ArrayList(); aud.add("bar"); - claimsSet.setAudience("bar"); JWSHeader header = new JWSHeader.Builder(JWSAlgorithm.RS256).build(); @@ -472,10 +473,4 @@ protected SignedJWT getJWT(String sub, Date expires, RSAPrivateKey privateKey) return signedJWT; } - - class TestJWTRedirectAuthenticationHandler extends JWTRedirectAuthenticationHandler { - public String testConstructLoginURL(HttpServletRequest req) { - return constructLoginURL(req); - } - }; } diff --git a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties index 70e52997a1b..4e6e46f9a56 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties +++ b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties @@ -182,8 +182,6 @@ log4j.appender.DNMETRICSRFA.MaxFileSize=64MB #log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG #log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG -# Jets3t library -log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR # AWS SDK & S3A FileSystem log4j.logger.com.amazonaws=ERROR diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderCryptoExtension.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderCryptoExtension.java index 693c7853edf..3ee3bd756e2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderCryptoExtension.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderCryptoExtension.java @@ -554,6 +554,16 @@ public EncryptedKeyVersion reencryptEncryptedKey(EncryptedKeyVersion ekv) return getExtension().reencryptEncryptedKey(ekv); } + /** + * Calls {@link CryptoExtension#drain(String)} for the given key name on the + * underlying {@link CryptoExtension}. + * + * @param keyName + */ + public void drain(String keyName) { + getExtension().drain(keyName); + } + /** * Batched version of {@link #reencryptEncryptedKey(EncryptedKeyVersion)}. *

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java index 9bef32c06d6..b8823352d08 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java @@ -247,8 +247,9 @@ public static class Factory extends KeyProviderFactory { * - HOSTNAME = string * - PORT = integer * - * This will always create a {@link LoadBalancingKMSClientProvider} - * if the uri is correct. + * If multiple hosts are provider, the Factory will create a + * {@link LoadBalancingKMSClientProvider} that round-robins requests + * across the provided list of hosts. */ @Override public KeyProvider createProvider(URI providerUri, Configuration conf) @@ -275,26 +276,30 @@ public KeyProvider createProvider(URI providerUri, Configuration conf) } hostsPart = t[0]; } - return createProvider(conf, origUrl, port, hostsPart); + return createProvider(providerUri, conf, origUrl, port, hostsPart); } return null; } - private KeyProvider createProvider(Configuration conf, + private KeyProvider createProvider(URI providerUri, Configuration conf, URL origUrl, int port, String hostsPart) throws IOException { String[] hosts = hostsPart.split(";"); - KMSClientProvider[] providers = new KMSClientProvider[hosts.length]; - for (int i = 0; i < hosts.length; i++) { - try { - providers[i] = - new KMSClientProvider( - new URI("kms", origUrl.getProtocol(), hosts[i], port, - origUrl.getPath(), null, null), conf); - } catch (URISyntaxException e) { - throw new IOException("Could not instantiate KMSProvider.", e); + if (hosts.length == 1) { + return new KMSClientProvider(providerUri, conf); + } else { + KMSClientProvider[] providers = new KMSClientProvider[hosts.length]; + for (int i = 0; i < hosts.length; i++) { + try { + providers[i] = + new KMSClientProvider( + new URI("kms", origUrl.getProtocol(), hosts[i], port, + origUrl.getPath(), null, null), conf); + } catch (URISyntaxException e) { + throw new IOException("Could not instantiate KMSProvider..", e); + } } + return new LoadBalancingKMSClientProvider(providers, conf); } - return new LoadBalancingKMSClientProvider(providers, conf); } } @@ -1023,11 +1028,7 @@ public Token run() throws Exception { } catch (InterruptedException e) { Thread.currentThread().interrupt(); } catch (Exception e) { - if (e instanceof IOException) { - throw (IOException) e; - } else { - throw new IOException(e); - } + throw new IOException(e); } } return tokens; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java index de4d25a8e3c..71d32ff5527 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java @@ -19,7 +19,6 @@ package org.apache.hadoop.crypto.key.kms; import java.io.IOException; -import java.io.InterruptedIOException; import java.security.GeneralSecurityException; import java.security.NoSuchAlgorithmException; import java.util.Arrays; @@ -32,13 +31,9 @@ import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.CryptoExtension; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion; import org.apache.hadoop.crypto.key.KeyProviderDelegationTokenExtension; -import org.apache.hadoop.fs.CommonConfigurationKeysPublic; -import org.apache.hadoop.io.retry.RetryPolicies; -import org.apache.hadoop.io.retry.RetryPolicy; -import org.apache.hadoop.io.retry.RetryPolicy.RetryAction; -import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -74,8 +69,6 @@ public WrapperException(Throwable cause) { private final KMSClientProvider[] providers; private final AtomicInteger currentIdx; - private RetryPolicy retryPolicy = null; - public LoadBalancingKMSClientProvider(KMSClientProvider[] providers, Configuration conf) { this(shuffle(providers), Time.monotonicNow(), conf); @@ -87,79 +80,24 @@ public LoadBalancingKMSClientProvider(KMSClientProvider[] providers, super(conf); this.providers = providers; this.currentIdx = new AtomicInteger((int)(seed % providers.length)); - int maxNumRetries = conf.getInt(CommonConfigurationKeysPublic. - KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, providers.length); - int sleepBaseMillis = conf.getInt(CommonConfigurationKeysPublic. - KMS_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_KEY, - CommonConfigurationKeysPublic. - KMS_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_DEFAULT); - int sleepMaxMillis = conf.getInt(CommonConfigurationKeysPublic. - KMS_CLIENT_FAILOVER_SLEEP_MAX_MILLIS_KEY, - CommonConfigurationKeysPublic. - KMS_CLIENT_FAILOVER_SLEEP_MAX_MILLIS_DEFAULT); - Preconditions.checkState(maxNumRetries >= 0); - Preconditions.checkState(sleepBaseMillis >= 0); - Preconditions.checkState(sleepMaxMillis >= 0); - this.retryPolicy = RetryPolicies.failoverOnNetworkException( - RetryPolicies.TRY_ONCE_THEN_FAIL, maxNumRetries, 0, sleepBaseMillis, - sleepMaxMillis); } @VisibleForTesting - public KMSClientProvider[] getProviders() { + KMSClientProvider[] getProviders() { return providers; } private T doOp(ProviderCallable op, int currPos) throws IOException { - if (providers.length == 0) { - throw new IOException("No providers configured !"); - } IOException ex = null; - int numFailovers = 0; - for (int i = 0;; i++, numFailovers++) { + for (int i = 0; i < providers.length; i++) { KMSClientProvider provider = providers[(currPos + i) % providers.length]; try { return op.call(provider); - } catch (AccessControlException ace) { - // No need to retry on AccessControlException - // and AuthorizationException. - // This assumes all the servers are configured with identical - // permissions and identical key acls. - throw ace; } catch (IOException ioe) { - LOG.warn("KMS provider at [{}] threw an IOException: ", - provider.getKMSUrl(), ioe); + LOG.warn("KMS provider at [{}] threw an IOException!! {}", + provider.getKMSUrl(), StringUtils.stringifyException(ioe)); ex = ioe; - - RetryAction action = null; - try { - action = retryPolicy.shouldRetry(ioe, 0, numFailovers, false); - } catch (Exception e) { - if (e instanceof IOException) { - throw (IOException)e; - } - throw new IOException(e); - } - if (action.action == RetryAction.RetryDecision.FAIL) { - LOG.warn("Aborting since the Request has failed with all KMS" - + " providers(depending on {}={} setting and numProviders={})" - + " in the group OR the exception is not recoverable", - CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, - getConf().getInt( - CommonConfigurationKeysPublic. - KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, providers.length), - providers.length); - throw ex; - } - if (((numFailovers + 1) % providers.length) == 0) { - // Sleep only after we try all the providers for every cycle. - try { - Thread.sleep(action.delayMillis); - } catch (InterruptedException e) { - throw new InterruptedIOException("Thread Interrupted"); - } - } } catch (Exception e) { if (e instanceof RuntimeException) { throw (RuntimeException)e; @@ -168,6 +106,12 @@ private T doOp(ProviderCallable op, int currPos) } } } + if (ex != null) { + LOG.warn("Aborting since the Request has failed with all KMS" + + " providers in the group. !!"); + throw ex; + } + throw new IOException("No providers configured !!"); } private int nextIdx() { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java index df14ee8762b..cf484cae754 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java @@ -928,6 +928,11 @@ public FileStatus next() { * The specification of this method matches that of * {@link FileContext#listLocatedStatus(Path)} except that Path f * must be for this file system. + * + * In HDFS implementation, the BlockLocation of returned LocatedFileStatus + * will have different formats for replicated and erasure coded file. Please + * refer to {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} + * for more details. */ public RemoteIterator listLocatedStatus(final Path f) throws AccessControlException, FileNotFoundException, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java index 591febf0fd4..4dae2334d59 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java @@ -28,6 +28,34 @@ * Represents the network location of a block, information about the hosts * that contain block replicas, and other block metadata (E.g. the file * offset associated with the block, length, whether it is corrupt, etc). + * + * For a single BlockLocation, it will have different meanings for replicated + * and erasure coded files. + * + * If the file is 3-replicated, offset and length of a BlockLocation represent + * the absolute value in the file and the hosts are the 3 datanodes that + * holding the replicas. Here is an example: + *

+ * BlockLocation(offset: 0, length: BLOCK_SIZE,
+ *   hosts: {"host1:9866", "host2:9866, host3:9866"})
+ * 
+ * + * And if the file is erasure-coded, each BlockLocation represents a logical + * block groups. Value offset is the offset of a block group in the file and + * value length is the total length of a block group. Hosts of a BlockLocation + * are the datanodes that holding all the data blocks and parity blocks of a + * block group. + * Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + * A BlockLocation example will be like: + *
+ * BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
+ *   "host2:9866","host3:9866","host4:9866","host5:9866"})
+ * 
+ * + * Please refer to + * {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or + * {@link FileContext#getFileBlockLocations(Path, long, long)} + * for more examples. */ @InterfaceAudience.Public @InterfaceStability.Stable diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java index 4fda2b83320..b5f355a8138 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java @@ -721,35 +721,6 @@ public class CommonConfigurationKeysPublic { /** Default value for KMS_CLIENT_ENC_KEY_CACHE_EXPIRY (12 hrs)*/ public static final int KMS_CLIENT_ENC_KEY_CACHE_EXPIRY_DEFAULT = 43200000; - /** - * @see - * - * core-default.xml - */ - /** Default value is the number of providers specified. */ - public static final String KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY = - "hadoop.security.kms.client.failover.max.retries"; - - /** - * @see - * - * core-default.xml - */ - public static final String KMS_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_KEY = - "hadoop.security.kms.client.failover.sleep.base.millis"; - /** Default value is 100 ms. */ - public static final int KMS_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_DEFAULT = 100; - - /** - * @see - * - * core-default.xml - */ - public static final String KMS_CLIENT_FAILOVER_SLEEP_MAX_MILLIS_KEY = - "hadoop.security.kms.client.failover.sleep.max.millis"; - /** Default value is 2 secs. */ - public static final int KMS_CLIENT_FAILOVER_SLEEP_MAX_MILLIS_DEFAULT = 2000; - /** * @see * diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java index 640db592fb1..a80279db525 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java @@ -230,8 +230,8 @@ public void unbuffer() { try { ((CanUnbuffer)in).unbuffer(); } catch (ClassCastException e) { - throw new UnsupportedOperationException("this stream does not " + - "support unbuffering."); + throw new UnsupportedOperationException("this stream " + + in.getClass().getName() + " does not " + "support unbuffering."); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java index 21733b3e4c2..a3cc550b487 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java @@ -1293,7 +1293,36 @@ public Path next(final AbstractFileSystem fs, final Path p) * * This call is most helpful with DFS, where it returns * hostnames of machines that contain the given file. - * + * + * In HDFS, if file is three-replicated, the returned array contains + * elements like: + *
+   * BlockLocation(offset: 0, length: BLOCK_SIZE,
+   *   hosts: {"host1:9866", "host2:9866, host3:9866"})
+   * BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
+   *   hosts: {"host2:9866", "host3:9866, host4:9866"})
+   * 
+ * + * And if a file is erasure-coded, the returned BlockLocation are logical + * block groups. + * + * Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + * 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then + * there will be one BlockLocation returned, with 0 offset, actual file size + * and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks. + * 3. If the file size is less than one group size but greater than one + * stripe size, then there will be one BlockLocation returned, with 0 offset, + * actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting + * the actual blocks. + * 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123 + * for example, then the result will be like: + *
+   * BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
+   *   "host2:9866","host3:9866","host4:9866","host5:9866"})
+   * BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
+   *   "host4:9866", "host5:9866"})
+   * 
+ * * @param f - get blocklocations of this file * @param start position (byte offset) * @param len (in bytes) @@ -1527,7 +1556,7 @@ public RemoteIterator next(final AbstractFileSystem fs, * Return the file's status and block locations If the path is a file. * * If a returned status is a file, it contains the file's block locations. - * + * * @param f is the path * * @return an iterator that traverses statuses of the files/directories diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index fc7b9b2f508..d43e41d0059 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -799,7 +799,36 @@ protected void checkPath(Path path) { * The default implementation returns an array containing one element: *
    * BlockLocation( { "localhost:9866" },  { "localhost" }, 0, file.getLen())
-   * 
> + * + * + * In HDFS, if file is three-replicated, the returned array contains + * elements like: + *
+   * BlockLocation(offset: 0, length: BLOCK_SIZE,
+   *   hosts: {"host1:9866", "host2:9866, host3:9866"})
+   * BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
+   *   hosts: {"host2:9866", "host3:9866, host4:9866"})
+   * 
+ * + * And if a file is erasure-coded, the returned BlockLocation are logical + * block groups. + * + * Suppose we have a RS_3_2 coded file (3 data units and 2 parity units). + * 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then + * there will be one BlockLocation returned, with 0 offset, actual file size + * and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks. + * 3. If the file size is less than one group size but greater than one + * stripe size, then there will be one BlockLocation returned, with 0 offset, + * actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting + * the actual blocks. + * 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123 + * for example, then the result will be like: + *
+   * BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
+   *   "host2:9866","host3:9866","host4:9866","host5:9866"})
+   * BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
+   *   "host4:9866", "host5:9866"})
+   * 
* * @param file FilesStatus to get data from * @param start offset into the given file @@ -2115,6 +2144,7 @@ public RemoteIterator listStatusIterator(final Path p) * List the statuses and block locations of the files in the given path. * Does not guarantee to return the iterator that traverses statuses * of the files in a sorted order. + * *
    * If the path is a directory,
    *   if recursive is false, returns files in the directory;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocatedFileStatus.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocatedFileStatus.java
index dbb751dc44e..29e19989edd 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocatedFileStatus.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocatedFileStatus.java
@@ -123,6 +123,13 @@ public LocatedFileStatus(long length, boolean isdir,
 
   /**
    * Get the file's block locations
+   *
+   * In HDFS, the returned BlockLocation will have different formats for
+   * replicated and erasure coded file.
+   * Please refer to
+   * {@link FileSystem#getFileBlockLocations(FileStatus, long, long)}
+   * for more details.
+   *
    * @return the file's block locations
    */
   public BlockLocation[] getBlockLocations() {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java
index a450f664b20..fa447d8d469 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java
@@ -27,6 +27,7 @@
 import java.io.PrintStream;
 import java.net.BindException;
 import java.net.InetSocketAddress;
+import java.net.MalformedURLException;
 import java.net.URI;
 import java.net.URL;
 import java.util.ArrayList;
@@ -993,14 +994,31 @@ public WebAppContext getWebAppContext(){
    * Get the pathname to the webapps files.
    * @param appName eg "secondary" or "datanode"
    * @return the pathname as a URL
-   * @throws FileNotFoundException if 'webapps' directory cannot be found on CLASSPATH.
+   * @throws FileNotFoundException if 'webapps' directory cannot be found
+   *   on CLASSPATH or in the development location.
    */
   protected String getWebAppsPath(String appName) throws FileNotFoundException {
-    URL url = getClass().getClassLoader().getResource("webapps/" + appName);
-    if (url == null)
-      throw new FileNotFoundException("webapps/" + appName
-          + " not found in CLASSPATH");
-    String urlString = url.toString();
+    URL resourceUrl = null;
+    File webResourceDevLocation = new File("src/main/webapps", appName);
+    if (webResourceDevLocation.exists()) {
+      LOG.info("Web server is in development mode. Resources "
+          + "will be read from the source tree.");
+      try {
+        resourceUrl = webResourceDevLocation.getParentFile().toURI().toURL();
+      } catch (MalformedURLException e) {
+        throw new FileNotFoundException("Mailformed URL while finding the "
+            + "web resource dir:" + e.getMessage());
+      }
+    } else {
+      resourceUrl =
+          getClass().getClassLoader().getResource("webapps/" + appName);
+
+      if (resourceUrl == null) {
+        throw new FileNotFoundException("webapps/" + appName +
+            " not found in CLASSPATH");
+      }
+    }
+    String urlString = resourceUrl.toString();
     return urlString.substring(0, urlString.lastIndexOf('/'));
   }
 
@@ -1200,6 +1218,7 @@ private void bindForPortRange(ServerConnector listener, int startPort)
    * @throws Exception
    */
   void openListeners() throws Exception {
+    LOG.debug("opening listeners: {}", listeners);
     for (ServerConnector listener : listeners) {
       if (listener.getLocalPort() != -1 && listener.getLocalPort() != -2) {
         // This listener is either started externally or has been bound or was
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/BoundedRangeFileInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/BoundedRangeFileInputStream.java
index e7f4c8319e3..050c15bc61f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/BoundedRangeFileInputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/BoundedRangeFileInputStream.java
@@ -28,7 +28,7 @@
  * BoundedRangeFileInputStream on top of the same FSDataInputStream and they
  * would not interfere with each other.
  */
-class BoundedRangeFileInputStream extends InputStream {
+public class BoundedRangeFileInputStream extends InputStream {
 
   private FSDataInputStream in;
   private long pos;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Compression.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Compression.java
index f82f4df2e51..fa85ed77a1f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Compression.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Compression.java
@@ -43,7 +43,7 @@
 /**
  * Compression related stuff.
  */
-final class Compression {
+public final class Compression {
   static final Logger LOG = LoggerFactory.getLogger(Compression.class);
 
   /**
@@ -75,7 +75,7 @@ public void flush() throws IOException {
   /**
    * Compression algorithms.
    */
-  enum Algorithm {
+  public enum Algorithm {
     LZO(TFile.COMPRESSION_LZO) {
       private transient boolean checked = false;
       private static final String defaultClazz =
@@ -348,7 +348,7 @@ public String getName() {
     }
   }
 
-  static Algorithm getCompressionAlgorithmByName(String compressName) {
+  public static Algorithm getCompressionAlgorithmByName(String compressName) {
     Algorithm[] algos = Algorithm.class.getEnumConstants();
 
     for (Algorithm a : algos) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/SimpleBufferedOutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/SimpleBufferedOutputStream.java
index a26a02d5769..0a194a3ce60 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/SimpleBufferedOutputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/SimpleBufferedOutputStream.java
@@ -25,7 +25,7 @@
  * A simplified BufferedOutputStream with borrowed buffer, and allow users to
  * see how much data have been buffered.
  */
-class SimpleBufferedOutputStream extends FilterOutputStream {
+public class SimpleBufferedOutputStream extends FilterOutputStream {
   protected byte buf[]; // the borrowed buffer
   protected int count = 0; // bytes used in buffer.
 
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CpuTimeTracker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CpuTimeTracker.java
index 3f17c9ab113..b4ebe861c1f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CpuTimeTracker.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CpuTimeTracker.java
@@ -97,7 +97,9 @@ public long getCumulativeCpuTime() {
    * @param newTime new sample time
    */
   public void updateElapsedJiffies(BigInteger elapsedJiffies, long newTime) {
-    cumulativeCpuTime = elapsedJiffies.multiply(jiffyLengthInMillis);
+    BigInteger newValue = elapsedJiffies.multiply(jiffyLengthInMillis);
+    cumulativeCpuTime = newValue.compareTo(cumulativeCpuTime) >= 0 ?
+        newValue : cumulativeCpuTime;
     sampleTime = newTime;
   }
 
diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index 8c8507c961d..538df97fbc2 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -871,30 +871,6 @@
   File space usage statistics refresh interval in msec.
 
 
-
-  fs.s3n.buffer.dir
-  ${hadoop.tmp.dir}/s3n
-  Determines where on the local filesystem the s3n:// filesystem
-  should store files before sending them to S3
-  (or after retrieving them from S3).
-  
-
-
-
-  fs.s3n.maxRetries
-  4
-  The maximum number of retries for reading or writing files to S3,
-  before we signal failure to the application.
-  
-
-
-
-  fs.s3n.sleepTimeSeconds
-  10
-  The number of seconds to sleep between each S3 retry.
-  
-
-
 
   fs.swift.impl
   org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem
@@ -911,56 +887,6 @@
   
 
 
-
-  fs.s3n.awsAccessKeyId
-  AWS access key ID used by S3 native file system.
-
-
-
-  fs.s3n.awsSecretAccessKey
-  AWS secret key used by S3 native file system.
-
-
-
-  fs.s3n.block.size
-  67108864
-  Block size to use when reading files using the native S3
-  filesystem (s3n: URIs).
-
-
-
-  fs.s3n.multipart.uploads.enabled
-  false
-  Setting this property to true enables multiple uploads to
-  native S3 filesystem. When uploading a file, it is split into blocks
-  if the size is larger than fs.s3n.multipart.uploads.block.size.
-  
-
-
-
-  fs.s3n.multipart.uploads.block.size
-  67108864
-  The block size for multipart uploads to native S3 filesystem.
-  Default size is 64MB.
-  
-
-
-
-  fs.s3n.multipart.copy.block.size
-  5368709120
-  The block size for multipart copy in native S3 filesystem.
-  Default size is 5GB.
-  
-
-
-
-  fs.s3n.server-side-encryption-algorithm
-  
-  Specify a server-side encryption algorithm for S3.
-  Unset by default, and the only other currently allowable value is AES256.
-  
-
-
 
   fs.s3a.access.key
   AWS access key ID used by S3A file system. Omit for IAM role-based or provider-based authentication.
@@ -1234,22 +1160,12 @@
     uploads to.
 
 
-
-  fs.s3a.fast.upload
-  false
-  
-    Use the incremental block-based fast upload mechanism with
-    the buffering mechanism set in fs.s3a.fast.upload.buffer.
-  
-
-
 
   fs.s3a.fast.upload.buffer
   disk
   
-    The buffering mechanism to use when using S3A fast upload
-    (fs.s3a.fast.upload=true). Values: disk, array, bytebuffer.
-    This configuration option has no effect if fs.s3a.fast.upload is false.
+    The buffering mechanism to for data being written.
+    Values: disk, array, bytebuffer.
 
     "disk" will use the directories listed in fs.s3a.buffer.dir as
     the location(s) to save data prior to being uploaded.
@@ -1428,20 +1344,16 @@
   The implementation class of the S3A AbstractFileSystem.
 
 
-
-
-  fs.ozfs.impl
-  org.apache.hadoop.fs.ozone.OzoneFileSystem
-  The implementation class of the Ozone FileSystem.
-
+  
+    fs.s3a.list.version
+    2
+    
+      Select which version of the S3 SDK's List Objects API to use.  Currently
+      support 2 (default) and 1 (older API).
+    
+  
 
-
-  fs.AbstractFileSystem.ozfs.impl
-  org.apache.hadoop.fs.ozone.OzFs
-  The implementation class of the OzFs AbstractFileSystem.
-
-
-
+  
 
   fs.wasb.impl
   org.apache.hadoop.fs.azure.NativeAzureFileSystem
@@ -1547,7 +1459,21 @@
 
 
 
-
+  
+  
+    fs.ozfs.impl
+    org.apache.hadoop.fs.ozone.OzoneFileSystem
+    The implementation class of the Ozone FileSystem.
+  
+
+  
+    fs.AbstractFileSystem.ozfs.impl
+    org.apache.hadoop.fs.ozone.OzFs
+    The implementation class of the OzFs AbstractFileSystem.
+  
+
+
+  
 
 
   ipc.client.idlethreshold
@@ -1807,42 +1733,6 @@
   Replication factor
 
 
-
-
-
-  s3native.stream-buffer-size
-  4096
-  The size of buffer to stream files.
-  The size of this buffer should probably be a multiple of hardware
-  page size (4096 on Intel x86), and it determines how much data is
-  buffered during read and write operations.
-
-
-
-  s3native.bytes-per-checksum
-  512
-  The number of bytes per checksum.  Must not be larger than
-  s3native.stream-buffer-size
-
-
-
-  s3native.client-write-packet-size
-  65536
-  Packet size for clients to write
-
-
-
-  s3native.blocksize
-  67108864
-  Block size
-
-
-
-  s3native.replication
-  3
-  Replication factor
-
-
 
 
   ftp.stream-buffer-size
@@ -1977,38 +1867,38 @@
 
 
 
-  Enable/disable the cross-origin (CORS) filter.
   hadoop.http.cross-origin.enabled
   false
+  Enable/disable the cross-origin (CORS) filter.
 
 
 
+  hadoop.http.cross-origin.allowed-origins
+  *
   Comma separated list of origins that are allowed for web
     services needing cross-origin (CORS) support. Wildcards (*) and patterns
     allowed
-  hadoop.http.cross-origin.allowed-origins
-  *
 
 
 
-  Comma separated list of methods that are allowed for web
-    services needing cross-origin (CORS) support.
   hadoop.http.cross-origin.allowed-methods
   GET,POST,HEAD
+  Comma separated list of methods that are allowed for web
+    services needing cross-origin (CORS) support.
 
 
 
-  Comma separated list of headers that are allowed for web
-    services needing cross-origin (CORS) support.
   hadoop.http.cross-origin.allowed-headers
   X-Requested-With,Content-Type,Accept,Origin
+  Comma separated list of headers that are allowed for web
+    services needing cross-origin (CORS) support.
 
 
 
-  The number of seconds a pre-flighted request can be cached
-    for web services needing cross-origin (CORS) support.
   hadoop.http.cross-origin.max-age
   1800
+  The number of seconds a pre-flighted request can be cached
+    for web services needing cross-origin (CORS) support.
 
 
 
@@ -2099,13 +1989,13 @@
 
 
 
+  hadoop.http.staticuser.user
+  dr.who
   
     The user name to filter as, on static web filters
     while rendering content. An example use is the HDFS
     web UI (user to be used for browsing files).
   
-  hadoop.http.staticuser.user
-  dr.who
 
 
 
@@ -2453,34 +2343,6 @@
   
 
 
-
-  hadoop.security.kms.client.failover.sleep.base.millis
-  100
-  
-    Expert only. The time to wait, in milliseconds, between failover
-    attempts increases exponentially as a function of the number of
-    attempts made so far, with a random factor of +/- 50%. This option
-    specifies the base value used in the failover calculation. The
-    first failover will retry immediately. The 2nd failover attempt
-    will delay at least hadoop.security.client.failover.sleep.base.millis
-    milliseconds. And so on.
-  
-
-
-
-  hadoop.security.kms.client.failover.sleep.max.millis
-  2000
-  
-    Expert only. The time to wait, in milliseconds, between failover
-    attempts increases exponentially as a function of the number of
-    attempts made so far, with a random factor of +/- 50%. This option
-    specifies the maximum value to wait between failovers.
-    Specifically, the time between two failover attempts will not
-    exceed +/- 50% of hadoop.security.client.failover.sleep.max.millis
-    milliseconds.
-  
-
-
  
   ipc.server.max.connections
   0
@@ -2496,6 +2358,8 @@
   
 
   
+    hadoop.registry.rm.enabled
+    false
     
       Is the registry enabled in the YARN Resource Manager?
 
@@ -2507,50 +2371,50 @@
       If false, the paths must be created by other means,
       and no automatic cleanup of service records will take place.
     
-    hadoop.registry.rm.enabled
-    false
   
 
   
+    hadoop.registry.zk.root
+    /registry
     
       The root zookeeper node for the registry
     
-    hadoop.registry.zk.root
-    /registry
   
 
   
+    hadoop.registry.zk.session.timeout.ms
+    60000
     
       Zookeeper session timeout in milliseconds
     
-    hadoop.registry.zk.session.timeout.ms
-    60000
   
 
   
+    hadoop.registry.zk.connection.timeout.ms
+    15000
     
       Zookeeper connection timeout in milliseconds
     
-    hadoop.registry.zk.connection.timeout.ms
-    15000
   
 
   
+    hadoop.registry.zk.retry.times
+    5
     
       Zookeeper connection retry count before failing
     
-    hadoop.registry.zk.retry.times
-    5
   
 
   
-    
-    
     hadoop.registry.zk.retry.interval.ms
     1000
+    
+    
   
 
   
+    hadoop.registry.zk.retry.ceiling.ms
+    60000
     
       Zookeeper retry limit in milliseconds, during
       exponential backoff.
@@ -2560,20 +2424,20 @@
       with the backoff policy, result in a long retry
       period
     
-    hadoop.registry.zk.retry.ceiling.ms
-    60000
   
 
   
+    hadoop.registry.zk.quorum
+    localhost:2181
     
       List of hostname:port pairs defining the
       zookeeper quorum binding for the registry
     
-    hadoop.registry.zk.quorum
-    localhost:2181
   
 
   
+    hadoop.registry.secure
+    false
     
       Key to set if the registry is secure. Turning it on
       changes the permissions policy from "open access"
@@ -2581,11 +2445,11 @@
       a user adding one or more auth key pairs down their
       own tree.
     
-    hadoop.registry.secure
-    false
   
 
   
+    hadoop.registry.system.acls
+    sasl:yarn@, sasl:mapred@, sasl:hdfs@
     
       A comma separated list of Zookeeper ACL identifiers with
       system access to the registry in a secure cluster.
@@ -2595,11 +2459,11 @@
       If there is an "@" at the end of a SASL entry it
       instructs the registry client to append the default kerberos domain.
     
-    hadoop.registry.system.acls
-    sasl:yarn@, sasl:mapred@, sasl:hdfs@
   
 
   
+    hadoop.registry.kerberos.realm
+    
     
       The kerberos realm: used to set the realm of
       system principals which do not declare their realm,
@@ -2611,26 +2475,24 @@
       If neither are known and the realm is needed, then the registry
       service/client will fail.
     
-    hadoop.registry.kerberos.realm
-    
   
 
   
+    hadoop.registry.jaas.context
+    Client
     
       Key to define the JAAS context. Used in secure
       mode
     
-    hadoop.registry.jaas.context
-    Client
   
 
   
+    hadoop.shell.missing.defaultFs.warning
+    false
     
       Enable hdfs shell commands to display warnings if (fs.defaultFS) property
       is not set.
     
-    hadoop.shell.missing.defaultFs.warning
-    false
   
 
   
@@ -2660,13 +2522,13 @@
   
 
   
+    hadoop.http.logs.enabled
+    true
     
       Enable the "/logs" endpoint on all Hadoop daemons, which serves local
       logs, but may be considered a security risk due to it listing the contents
       of a directory.
     
-    hadoop.http.logs.enabled
-    true
   
 
   
@@ -2721,8 +2583,7 @@
       fs.adl.oauth2.credential, and fs.adl.oauth2.refresh.url.
       The RefreshToken type requires property fs.adl.oauth2.client.id and
       fs.adl.oauth2.refresh.token.
-      The MSI type requires properties fs.adl.oauth2.msi.port and
-      fs.adl.oauth2.msi.tenantguid.
+      The MSI type reads optional property fs.adl.oauth2.msi.port, if specified.
       The DeviceCode type requires property
       fs.adl.oauth2.devicecode.clientapp.id.
       The Custom type requires property fs.adl.oauth2.access.token.provider.
@@ -2766,17 +2627,8 @@
     
     
       The localhost port for the MSI token service. This is the port specified
-      when creating the Azure VM.
-      Used by MSI token provider.
-    
-  
-
-  
-    fs.adl.oauth2.msi.tenantguid
-    
-    
-      The tenant guid for the Azure AAD tenant under which the azure data lake
-      store account is created.
+      when creating the Azure VM. The default, if this setting is not specified,
+      is 50342.
       Used by MSI token provider.
     
   
@@ -2841,48 +2693,48 @@
   
 
   
-    Host:Port of the ZooKeeper server to be used.
-    
     hadoop.zk.address
     
+    Host:Port of the ZooKeeper server to be used.
+    
   
 
   
-    Number of tries to connect to ZooKeeper.
     hadoop.zk.num-retries
     1000
+    Number of tries to connect to ZooKeeper.
   
 
   
-    Retry interval in milliseconds when connecting to ZooKeeper.
-    
     hadoop.zk.retry-interval-ms
     1000
+    Retry interval in milliseconds when connecting to ZooKeeper.
+    
   
 
   
+    hadoop.zk.timeout-ms
+    10000
     ZooKeeper session timeout in milliseconds. Session expiration
     is managed by the ZooKeeper cluster itself, not by the client. This value is
     used by the cluster to determine when the client's session expires.
     Expirations happens when the cluster does not hear from the client within
     the specified session timeout period (i.e. no heartbeat).
-    hadoop.zk.timeout-ms
-    10000
   
 
   
-    ACL's to be used for ZooKeeper znodes.
     hadoop.zk.acl
     world:anyone:rwcda
+    ACL's to be used for ZooKeeper znodes.
   
 
   
+    hadoop.zk.auth
     
         Specify the auths to be used for the ACL's specified in hadoop.zk.acl.
         This takes a comma-separated list of authentication mechanisms, each of the
         form 'scheme:auth' (the same syntax used for the 'addAuth' command in
         the ZK CLI).
     
-    hadoop.zk.auth
   
 
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Compatibility.md b/hadoop-common-project/hadoop-common/src/site/markdown/Compatibility.md
index 05b18b59298..4fa8c027992 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/Compatibility.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/Compatibility.md
@@ -20,109 +20,276 @@ Apache Hadoop Compatibility
 Purpose
 -------
 
-This document captures the compatibility goals of the Apache Hadoop project. The different types of compatibility between Hadoop releases that affects Hadoop developers, downstream projects, and end-users are enumerated. For each type of compatibility we:
+This document captures the compatibility goals of the Apache Hadoop project.
+The different types of compatibility between Hadoop releases that affect
+Hadoop developers, downstream projects, and end-users are enumerated. For each
+type of compatibility this document will:
 
 * describe the impact on downstream projects or end-users
 * where applicable, call out the policy adopted by the Hadoop developers when incompatible changes are permitted.
 
+All Hadoop interfaces are classified according to the intended audience and
+stability in order to maintain compatibility with previous releases. See the
+[Hadoop Interface Taxonomy](./InterfaceClassification.html) for details
+about the classifications.
+
+### Target Audience
+
+This document is intended for consumption by the Hadoop developer community.
+This document describes the lens through which changes to the Hadoop project
+should be viewed. In order for end users and third party developers to have
+confidence about cross-release compatibility, the developer community must
+ensure that development efforts adhere to these policies. It is the
+responsibility of the project committers to validate that all changes either
+maintain compatibility or are explicitly marked as incompatible.
+
+Within a component Hadoop developers are free to use Private and Limited Private
+APIs, but when using components from a different module Hadoop developers
+should follow the same guidelines as third-party developers: do not
+use Private or Limited Private (unless explicitly allowed) interfaces and
+prefer instead Stable interfaces to Evolving or Unstable interfaces where
+possible. Where not possible, the preferred solution is to expand the audience
+of the API rather than introducing or perpetuating an exception to these
+compatibility guidelines. When working within a Maven module Hadoop developers
+should observe where possible the same level of restraint with regard to
+using components located in other Maven modules.
+
+Above all, Hadoop developers must be mindful of the impact of their changes.
+Stable interfaces must not change between major releases. Evolving interfaces
+must not change between minor releases. New classes and components must be
+labeled appropriately for audience and stability. See the
+[Hadoop Interface Taxonomy](./InterfaceClassification.html) for details about
+when the various labels are appropriate. As a general rule, all new interfaces
+and APIs should have the most limited labels (e.g. Private Unstable) that will
+not inhibit the intent of the interface or API.
+
+### Notational Conventions
+
+The key words "MUST" "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
+"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" are to be interpreted as
+described in [RFC 2119](http://tools.ietf.org/html/rfc2119).
+
+Deprecation
+-----------
+
+The Java API provides a @Deprecated annotation to mark an API element as
+flagged for removal. The standard meaning of the annotation is that the
+API element should not be used and may be removed in a later version.
+
+In all cases removing an element from an API is an incompatible
+change. In the case of [Stable](./InterfaceClassification.html#Stable) APIs,
+the change cannot be made between minor releases within the same major
+version. In addition, to allow consumers of the API time to adapt to the change,
+the API element to be removed should be marked as deprecated for a full major
+release before it is removed. For example, if a method is marked as deprecated
+in Hadoop 2.8, it cannot be removed until Hadoop 4.0.
+
+### Policy
+
+[Stable](./InterfaceClassification.html#Stable) API elements MUST NOT be removed
+until they have been marked as deprecated (through the @Deprecated annotation or
+other appropriate documentation) for a full major release. In the case that an
+API element was introduced as deprecated (to indicate that it is a temporary
+measure that is intended to be removed) the API element MAY be removed in the
+following major release. When modifying a
+[Stable](./InterfaceClassification.html#Stable) API, developers SHOULD prefer
+introducing a new method or endpoint and deprecating the existing one to making
+incompatible changes to the method or endpoint.
+
 Compatibility types
 -------------------
 
 ### Java API
 
-Hadoop interfaces and classes are annotated to describe the intended audience and stability in order to maintain compatibility with previous releases. See [Hadoop Interface Classification](./InterfaceClassification.html) for details.
+Developers SHOULD annotate all Hadoop interfaces and classes with the
+@InterfaceAudience and @InterfaceStability annotations to describe the
+intended audience and stability. Annotations may be at the package, class, or
+member variable or method level. Member variable and method annotations SHALL
+override class annotations, and class annotations SHALL override package
+annotations. A package, class, or member variable or method that is not
+annotated SHALL be interpreted as implicitly
+[Private](./InterfaceClassification.html#Private) and
+[Unstable](./InterfaceClassification.html#Unstable).
 
-* InterfaceAudience: captures the intended audience, possible values are Public (for end users and external projects), LimitedPrivate (for other Hadoop components, and closely related projects like YARN, MapReduce, HBase etc.), and Private (for intra component use).
-* InterfaceStability: describes what types of interface changes are permitted. Possible values are Stable, Evolving, Unstable, and Deprecated.
+* @InterfaceAudience captures the intended audience. Possible values are
+[Public](./InterfaceClassification.html#Public) (for end users and external
+projects), Limited[Private](./InterfaceClassification.html#Private) (for other
+Hadoop components, and closely related projects like YARN, MapReduce, HBase
+etc.), and [Private](./InterfaceClassification.html#Private)
+(for intra component use).
+* @InterfaceStability describes what types of interface changes are permitted. Possible values are [Stable](./InterfaceClassification.html#Stable), [Evolving](./InterfaceClassification.html#Evolving), and [Unstable](./InterfaceClassification.html#Unstable).
+* @Deprecated notes that the package, class, or member variable or method could potentially be removed in the future and should not be used.
 
 #### Use Cases
 
-* Public-Stable API compatibility is required to ensure end-user programs and downstream projects continue to work without modification.
-* LimitedPrivate-Stable API compatibility is required to allow upgrade of individual components across minor releases.
-* Private-Stable API compatibility is required for rolling upgrades.
+* [Public](./InterfaceClassification.html#Public)-[Stable](./InterfaceClassification.html#Stable) API compatibility is required to ensure end-user programs and downstream projects continue to work without modification.
+* [Public](./InterfaceClassification.html#Public)-[Evolving](./InterfaceClassification.html#Evolving) API compatibility is useful to make functionality available for consumption before it is fully baked.
+* Limited Private-[Stable](./InterfaceClassification.html#Stable) API compatibility is required to allow upgrade of individual components across minor releases.
+* [Private](./InterfaceClassification.html#Private)-[Stable](./InterfaceClassification.html#Stable) API compatibility is required for rolling upgrades.
+* [Private](./InterfaceClassification.html#Private)-[Unstable](./InterfaceClassification.html#Unstable) API compatibility allows internal components to evolve rapidly without concern for downstream consumers, and is how most interfaces should be labeled.
 
 #### Policy
 
-* Public-Stable APIs must be deprecated for at least one major release prior to their removal in a major release.
-* LimitedPrivate-Stable APIs can change across major releases, but not within a major release.
-* Private-Stable APIs can change across major releases, but not within a major release.
-* Classes not annotated are implicitly "Private". Class members not annotated inherit the annotations of the enclosing class.
-* Note: APIs generated from the proto files need to be compatible for rolling-upgrades. See the section on wire-compatibility for more details. The compatibility policies for APIs and wire-communication need to go hand-in-hand to address this.
+The compatibility policy SHALL be determined by the relevant package, class, or
+member variable or method annotations.
 
-### Semantic compatibility
+Note: APIs generated from the proto files MUST be compatible for rolling
+upgrades. See the section on wire protocol compatibility for more details. The
+compatibility policies for APIs and wire protocols must therefore go hand
+in hand.
 
-Apache Hadoop strives to ensure that the behavior of APIs remains consistent over versions, though changes for correctness may result in changes in behavior. Tests and javadocs specify the API's behavior. The community is in the process of specifying some APIs more rigorously, and enhancing test suites to verify compliance with the specification, effectively creating a formal specification for the subset of behaviors that can be easily tested.
+#### Semantic compatibility
+
+Apache Hadoop strives to ensure that the behavior of APIs remains consistent
+over versions, though changes for correctness may result in changes in
+behavior. API behavior SHALL be specified by the JavaDoc API documentation
+where present and complete. When JavaDoc API documentation is not available,
+behavior SHALL be specified by the behavior expected by the related unit tests.
+In cases with no JavaDoc API documentation or unit test coverage, the expected
+behavior is presumed to be obvious and SHOULD be assumed to be the minimum
+functionality implied by the interface naming. The community is in the process
+of specifying some APIs more rigorously and enhancing test suites to verify
+compliance with the specification, effectively creating a formal specification
+for the subset of behaviors that can be easily tested.
+
+The behavior of any API MAY be changed to fix incorrect behavior according to
+the stability of the API, with such a change to be accompanied by updating
+existing documentation and tests and/or adding new documentation or tests.
+
+#### Java Binary compatibility for end-user applications i.e. Apache Hadoop ABI
+
+Apache Hadoop revisions SHOUD retain binary compatability such that end-user
+applications continue to work without any modifications. Minor Apache Hadoop
+revisions within the same major revision MUST retain compatibility such that
+existing MapReduce applications (e.g. end-user applications and projects such
+as Apache Pig, Apache Hive, et al), existing YARN applications (e.g.
+end-user applications and projects such as Apache Spark, Apache Tez et al),
+and applications that accesses HDFS directly (e.g. end-user applications and
+projects such as Apache HBase, Apache Flume, et al) work unmodified and without
+recompilation when used with any Apache Hadoop cluster within the same major
+release as the original build target.
+
+For MapReduce applications in particular, i.e. applications using the
+org.apache.hadoop.mapred and/or org.apache.hadoop.mapreduce APIs, the developer
+community SHALL support binary compatibility across major releases. The
+MapReduce APIs SHALL be supported compatibly across major releases. See
+[Compatibility for MapReduce applications between hadoop-1.x and hadoop-2.x](../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html) for more details.
+
+Some applications may be affected by changes to disk layouts or other internal
+changes. See the sections that follow for policies on how incompatible
+changes to non-API interfaces are handled.
+
+### Native Dependencies
+
+Hadoop includes several native components, including compression, the
+container executor binary, and various native integrations. These native
+components introduce a set of native dependencies for Hadoop, both at compile
+time and at runtime, such as cmake, gcc, zlib, etc. This set of native
+dependencies is part of the Hadoop ABI.
 
 #### Policy
 
-The behavior of API may be changed to fix incorrect behavior, such a change to be accompanied by updating existing buggy tests or adding tests in cases there were none prior to the change.
+The minimum required versions of the native components on which Hadoop depends
+at compile time and/or runtime SHALL be considered
+[Stable](./InterfaceClassification.html#Stable). Changes to the minimum
+required versions MUST NOT increase between minor releases within a major
+version.
 
-### Wire compatibility
+### Wire Protocols
 
-Wire compatibility concerns data being transmitted over the wire between Hadoop processes. Hadoop uses Protocol Buffers for most RPC communication. Preserving compatibility requires prohibiting modification as described below. Non-RPC communication should be considered as well, for example using HTTP to transfer an HDFS image as part of snapshotting or transferring MapTask output. The potential communications can be categorized as follows:
+Wire compatibility concerns data being transmitted "over the wire" between
+Hadoop processes. Hadoop uses
+[Protocol Buffers](https://developers.google.com/protocol-buffers/) for most
+RPC communication. Preserving compatibility requires prohibiting modification
+as described below. Non-RPC communication should be considered as well, for
+example using HTTP to transfer an HDFS image as part of snapshotting or
+transferring MapReduce map task output. The communications can be categorized as
+follows:
 
 * Client-Server: communication between Hadoop clients and servers (e.g., the HDFS client to NameNode protocol, or the YARN client to ResourceManager protocol).
-* Client-Server (Admin): It is worth distinguishing a subset of the Client-Server protocols used solely by administrative commands (e.g., the HAAdmin protocol) as these protocols only impact administrators who can tolerate changes that end users (which use general Client-Server protocols) can not.
+* Client-Server (Admin): It is worth distinguishing a subset of the Client-Server protocols used solely by administrative commands (e.g., the HAAdmin protocol) as these protocols only impact administrators who can tolerate changes that end users (which use general Client-Server protocols) cannot.
 * Server-Server: communication between servers (e.g., the protocol between the DataNode and NameNode, or NodeManager and ResourceManager)
 
-#### Use Cases
+#### Protocol Dependencies
 
-* Client-Server compatibility is required to allow users to continue using the old clients even after upgrading the server (cluster) to a later version (or vice versa). For example, a Hadoop 2.1.0 client talking to a Hadoop 2.3.0 cluster.
-* Client-Server compatibility is also required to allow users to upgrade the client before upgrading the server (cluster). For example, a Hadoop 2.4.0 client talking to a Hadoop 2.3.0 cluster. This allows deployment of client-side bug fixes ahead of full cluster upgrades. Note that new cluster features invoked by new client APIs or shell commands will not be usable. YARN applications that attempt to use new APIs (including new fields in data structures) that have not yet been deployed to the cluster can expect link exceptions.
-* Client-Server compatibility is also required to allow upgrading individual components without upgrading others. For example, upgrade HDFS from version 2.1.0 to 2.2.0 without upgrading MapReduce.
-* Server-Server compatibility is required to allow mixed versions within an active cluster so the cluster may be upgraded without downtime in a rolling fashion.
+The components of Apache Hadoop may have dependencies that include their own
+protocols, such as Zookeeper, S3, Kerberos, etc. These protocol dependencies
+SHALL be treated as internal protocols and governed by the same policy.
+
+#### Transports
+
+In addition to compatibility of the protocols themselves, maintaining
+cross-version communications requires that the transports supported also be
+stable. The most likely source of transport changes stems from secure
+transports, such as SSL. Upgrading a service from SSLv2 to SSLv3 may break
+existing SSLv2 clients. The minimum supported major version of any transports
+MUST not increase across minor releases within a major version.
+
+Service ports are considered as part of the transport mechanism. Fixed
+service port numbers MUST be kept consistent to prevent breaking clients.
 
 #### Policy
 
-* Both Client-Server and Server-Server compatibility is preserved within a major release. (Different policies for different categories are yet to be considered.)
-* Compatibility can be broken only at a major release, though breaking compatibility even at major releases has grave consequences and should be discussed in the Hadoop community.
-* Hadoop protocols are defined in .proto (ProtocolBuffers) files. Client-Server protocols and Server-Server protocol .proto files are marked as stable. When a .proto file is marked as stable it means that changes should be made in a compatible fashion as described below:
-    * The following changes are compatible and are allowed at any time:
-        * Add an optional field, with the expectation that the code deals with the field missing due to communication with an older version of the code.
-        * Add a new rpc/method to the service
-        * Add a new optional request to a Message
-        * Rename a field
-        * Rename a .proto file
-        * Change .proto annotations that effect code generation (e.g. name of java package)
-    * The following changes are incompatible but can be considered only at a major release
-        * Change the rpc/method name
-        * Change the rpc/method parameter type or return type
-        * Remove an rpc/method
-        * Change the service name
-        * Change the name of a Message
-        * Modify a field type in an incompatible way (as defined recursively)
-        * Change an optional field to required
-        * Add or delete a required field
-        * Delete an optional field as long as the optional field has reasonable defaults to allow deletions
-    * The following changes are incompatible and hence never allowed
-        * Change a field id
-        * Reuse an old field that was previously deleted.
-        * Field numbers are cheap and changing and reusing is not a good idea.
+Hadoop wire protocols are defined in .proto (ProtocolBuffers) files.
+Client-Server and Server-Server protocols SHALL be classified according to the
+audience and stability classifications noted in their .proto files. In cases
+where no classifications are present, the protocols SHOULD be assumed to be
+[Private](./InterfaceClassification.html#Private) and
+[Stable](./InterfaceClassification.html#Stable).
 
-### Java Binary compatibility for end-user applications i.e. Apache Hadoop ABI
+The following changes to a .proto file SHALL be considered compatible:
 
-As Apache Hadoop revisions are upgraded end-users reasonably expect that their applications should continue to work without any modifications. This is fulfilled as a result of supporting API compatibility, Semantic compatibility and Wire compatibility.
+* Add an optional field, with the expectation that the code deals with the field missing due to communication with an older version of the code
+* Add a new rpc/method to the service
+* Add a new optional request to a Message
+* Rename a field
+* Rename a .proto file
+* Change .proto annotations that effect code generation (e.g. name of java package)
 
-However, Apache Hadoop is a very complex, distributed system and services a very wide variety of use-cases. In particular, Apache Hadoop MapReduce is a very, very wide API; in the sense that end-users may make wide-ranging assumptions such as layout of the local disk when their map/reduce tasks are executing, environment variables for their tasks etc. In such cases, it becomes very hard to fully specify, and support, absolute compatibility.
+The following changes to a .proto file SHALL be considered incompatible:
 
-#### Use cases
+* Change an rpc/method name
+* Change an rpc/method parameter type or return type
+* Remove an rpc/method
+* Change the service name
+* Change the name of a Message
+* Modify a field type in an incompatible way (as defined recursively)
+* Change an optional field to required
+* Add or delete a required field
+* Delete an optional field as long as the optional field has reasonable defaults to allow deletions
 
-* Existing MapReduce applications, including jars of existing packaged end-user applications and projects such as Apache Pig, Apache Hive, Cascading etc. should work unmodified when pointed to an upgraded Apache Hadoop cluster within a major release.
-* Existing YARN applications, including jars of existing packaged end-user applications and projects such as Apache Tez etc. should work unmodified when pointed to an upgraded Apache Hadoop cluster within a major release.
-* Existing applications which transfer data in/out of HDFS, including jars of existing packaged end-user applications and frameworks such as Apache Flume, should work unmodified when pointed to an upgraded Apache Hadoop cluster within a major release.
+The following changes to a .proto file SHALL be considered incompatible:
 
-#### Policy
+* Change a field id
+* Reuse an old field that was previously deleted.
 
-* Existing MapReduce, YARN & HDFS applications and frameworks should work unmodified within a major release i.e. Apache Hadoop ABI is supported.
-* A very minor fraction of applications maybe affected by changes to disk layouts etc., the developer community will strive to minimize these changes and will not make them within a minor version. In more egregious cases, we will consider strongly reverting these breaking changes and invalidating offending releases if necessary.
-* In particular for MapReduce applications, the developer community will try our best to support providing binary compatibility across major releases e.g. applications using org.apache.hadoop.mapred.
-* APIs are supported compatibly across hadoop-1.x and hadoop-2.x. See [Compatibility for MapReduce applications between hadoop-1.x and hadoop-2.x](../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html) for more details.
+Hadoop wire protocols that are not defined via .proto files SHOULD be considered
+to be [Private](./InterfaceClassification.html#Private) and
+[Stable](./InterfaceClassification.html#Stable).
+
+In addition to the limitations imposed by being
+[Stable](./InterfaceClassification.html#Stable), Hadoop's wire protocols
+MUST also be forward compatible across minor releases within a major version
+according to the following:
+
+* Client-Server compatibility MUST be maintained so as to allow users to continue using older clients even after upgrading the server (cluster) to a later version (or vice versa). For example, a Hadoop 2.1.0 client talking to a Hadoop 2.3.0 cluster.
+* Client-Server compatibility MUST be maintained so as to allow users to upgrade the client before upgrading the server (cluster). For example, a Hadoop 2.4.0 client talking to a Hadoop 2.3.0 cluster. This allows deployment of client-side bug fixes ahead of full cluster upgrades. Note that new cluster features invoked by new client APIs or shell commands will not be usable. YARN applications that attempt to use new APIs (including new fields in data structures) that have not yet been deployed to the cluster can expect link exceptions.
+* Client-Server compatibility MUST be maintained so as to allow upgrading individual components without upgrading others. For example, upgrade HDFS from version 2.1.0 to 2.2.0 without upgrading MapReduce.
+* Server-Server compatibility MUST be maintained so as to allow mixed versions within an active cluster so the cluster may be upgraded without downtime in a rolling fashion.
+
+New transport mechanisms MUST only be introduced with minor or major version
+changes. Existing transport mechanisms MUST continue to be supported across
+minor versions within a major version. Service port numbers MUST remain
+consistent across minor version numbers within a major version.
 
 ### REST APIs
 
-REST API compatibility corresponds to both the requests (URLs) and responses to each request (content, which may contain other URLs). Hadoop REST APIs are specifically meant for stable use by clients across releases, even major ones. The following are the exposed REST APIs:
+REST API compatibility applies to the REST endpoints (URLs) and response data
+format. Hadoop REST APIs are specifically meant for stable use by clients across
+releases, even major ones. The following is a non-exhaustive list of the
+exposed REST APIs:
 
-* [WebHDFS](../hadoop-hdfs/WebHDFS.html) - Stable
+* [WebHDFS](../hadoop-hdfs/WebHDFS.html)
 * [ResourceManager](../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html)
 * [NodeManager](../../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html)
 * [MR Application Master](../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html)
@@ -130,134 +297,390 @@ REST API compatibility corresponds to both the requests (URLs) and responses to
 * [Timeline Server v1 REST API](../../hadoop-yarn/hadoop-yarn-site/TimelineServer.html)
 * [Timeline Service v2 REST API](../../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html)
 
+Each API has an API-specific version number. Any incompatible changes MUST
+increment the API version number.
+
 #### Policy
 
-The APIs annotated stable in the text above preserve compatibility across at least one major release, and maybe deprecated by a newer version of the REST API in a major release.
+The Hadoop REST APIs SHALL be considered
+[Public](./InterfaceClassification.html#Public) and
+[Evolving](./InterfaceClassification.html#Evolving). With respect to API version
+numbers, the Hadoop REST APIs SHALL be considered
+[Public](./InterfaceClassification.html#Public) and
+[Stable](./InterfaceClassification.html#Stable), i.e. no incompatible changes
+are allowed to within an API version number.
+
+### Log Output
+
+The Hadoop daemons and CLIs produce log output via Log4j that is intended to
+aid administrators and developers in understanding and troubleshooting cluster
+behavior. Log messages are intended for human consumption, though automation
+use cases are also supported.
+
+#### Policy
+
+All log output SHALL be considered
+[Public](./InterfaceClassification.html#Public) and
+[Evolving](./InterfaceClassification.html#Evolving).
+
+### Audit Log Output
+
+Several components have audit logging systems that record system information in
+a machine readable format. Incompatible changes to that data format may break
+existing automation utilities. For the audit log, an incompatible change is any
+change that changes the format such that existing parsers no longer can parse
+the logs.
+
+#### Policy
+
+All audit log output SHALL be considered
+[Public](./InterfaceClassification.html#Public) and
+[Stable](./InterfaceClassification.html#Stable). Any change to the
+data format SHALL be considered an incompatible change.
 
 ### Metrics/JMX
 
-While the Metrics API compatibility is governed by Java API compatibility, the actual metrics exposed by Hadoop need to be compatible for users to be able to automate using them (scripts etc.). Adding additional metrics is compatible. Modifying (e.g. changing the unit or measurement) or removing existing metrics breaks compatibility. Similarly, changes to JMX MBean object names also break compatibility.
+While the Metrics API compatibility is governed by Java API compatibility, the
+Metrics data format exposed by Hadoop MUST be maintained as compatible for
+consumers of the data, e.g. for automation tasks.
 
 #### Policy
 
-Metrics should preserve compatibility within the major release.
+The data format exposed via Metrics SHALL be considered
+[Public](./InterfaceClassification.html#Public) and
+[Stable](./InterfaceClassification.html#Stable).
 
 ### File formats & Metadata
 
-User and system level data (including metadata) is stored in files of different formats. Changes to the metadata or the file formats used to store data/metadata can lead to incompatibilities between versions.
+User and system level data (including metadata) is stored in files of various
+formats. Changes to the metadata or the file formats used to store
+data/metadata can lead to incompatibilities between versions. Each class of file
+format is addressed below.
 
 #### User-level file formats
 
-Changes to formats that end-users use to store their data can prevent them from accessing the data in later releases, and hence it is highly important to keep those file-formats compatible. One can always add a "new" format improving upon an existing format. Examples of these formats include har, war, SequenceFileFormat etc.
+Changes to formats that end users use to store their data can prevent them from
+accessing the data in later releases, and hence are important to be compatible.
+Examples of these formats include har, war, SequenceFileFormat, etc.
 
 ##### Policy
 
-* Non-forward-compatible user-file format changes are restricted to major releases. When user-file formats change, new releases are expected to read existing formats, but may write data in formats incompatible with prior releases. Also, the community shall prefer to create a new format that programs must opt in to instead of making incompatible changes to existing formats.
+User-level file formats SHALL be considered
+[Public](./InterfaceClassification.html#Public) and
+[Stable](./InterfaceClassification.html#Stable). User-lever file
+format changes SHOULD be made forward compatible across major releases and MUST
+be made forward compatible within a major release. The developer community
+SHOULD prefer the creation of a new derivative file format to making
+incompatible changes to an existing file format. Such new file formats MUST be
+created as opt-in, meaning that users must be able to continue using the
+existing compatible format until and unless they explicitly opt in to using
+the new file format.
 
-#### System-internal file formats
+#### System-internal data schemas
 
-Hadoop internal data is also stored in files and again changing these formats can lead to incompatibilities. While such changes are not as devastating as the user-level file formats, a policy on when the compatibility can be broken is important.
+Hadoop internal data may also be stored in files or other data stores. Changing
+the schemas of these data stores can lead to incompatibilities.
 
 ##### MapReduce
 
 MapReduce uses formats like I-File to store MapReduce-specific data.
 
-##### Policy
+###### Policy
 
-MapReduce-internal formats like IFile maintain compatibility within a major release. Changes to these formats can cause in-flight jobs to fail and hence we should ensure newer clients can fetch shuffle-data from old servers in a compatible manner.
+All MapReduce-internal file formats, such as I-File format or the job history
+server's jhist file format, SHALL be considered
+[Private](./InterfaceClassification.html#Private) and
+[Stable](./InterfaceClassification.html#Stable).
 
 ##### HDFS Metadata
 
-HDFS persists metadata (the image and edit logs) in a particular format. Incompatible changes to either the format or the metadata prevent subsequent releases from reading older metadata. Such incompatible changes might require an HDFS "upgrade" to convert the metadata to make it accessible. Some changes can require more than one such "upgrades".
+HDFS persists metadata (the image and edit logs) in a private file format.
+Incompatible changes to either the format or the metadata prevent subsequent
+releases from reading older metadata. Incompatible changes MUST include a
+process by which existing metadata may be upgraded. Changes SHALL be
+allowed to require more than one upgrade. Incompatible changes MUST result in
+the metadata version number being incremented.
 
-Depending on the degree of incompatibility in the changes, the following potential scenarios can arise:
+Depending on the degree of incompatibility in the changes, the following
+potential scenarios can arise:
 
 * Automatic: The image upgrades automatically, no need for an explicit "upgrade".
 * Direct: The image is upgradable, but might require one explicit release "upgrade".
 * Indirect: The image is upgradable, but might require upgrading to intermediate release(s) first.
 * Not upgradeable: The image is not upgradeable.
 
-##### Policy
+HDFS data nodes store data in a private directory structure. The schema of that
+directory structure must remain stable to retain compatibility.
 
-* A release upgrade must allow a cluster to roll-back to the older version and its older disk format. The rollback needs to restore the original data, but not required to restore the updated data.
-* HDFS metadata changes must be upgradeable via any of the upgrade paths - automatic, direct or indirect.
-* More detailed policies based on the kind of upgrade are yet to be considered.
+###### Policy
+
+The HDFS metadata format SHALL be considered
+[Private](./InterfaceClassification.html#Private) and
+[Evolving](./InterfaceClassification.html#Evolving). Incompatible
+changes MUST include a process by which existing metada may be upgraded. The
+upgrade process MUST allow the cluster metadata to be rolled back to the older
+version and its older disk format. The rollback MUST restore the original data
+but is not REQUIRED to restore the updated data. Any incompatible change
+to the format MUST result in the major version number of the schema being
+incremented.
+
+The data node directory format SHALL be considered
+[Private](./InterfaceClassification.html#Private) and
+[Evolving](./InterfaceClassification.html#Evolving).
+
+##### AWS S3A Guard Metadata
+
+For each operation in the Hadoop S3 client (s3a) that reads or modifies
+file metadata, a shadow copy of that file metadata is stored in a separate
+metadata store, which offers HDFS-like consistency for the metadata, and may
+also provide faster lookups for things like file status or directory listings.
+S3A guard tables are created with a version marker which indicates
+compatibility.
+
+###### Policy
+
+The S3A guard metadata schema SHALL be considered
+[Private](./InterfaceClassification.html#Private) and
+[Unstable](./InterfaceClassification.html#Unstable). Any incompatible change
+to the schema MUST result in the version number of the schema being incremented.
+
+##### YARN Resource Manager State Store
+
+The YARN resource manager stores information about the cluster state in an
+external state store for use in fail over and recovery. If the schema used for
+the state store data does not remain compatible, the resource manager will not
+be able to recover its state and will fail to start. The state store data
+schema includes a version number that indicates compatibility.
+
+###### Policy
+
+The YARN resource manager state store data schema SHALL be considered
+[Private](./InterfaceClassification.html#Private) and
+[Evolving](./InterfaceClassification.html#Evolving). Any incompatible change
+to the schema MUST result in the major version number of the schema being
+incremented. Any compatible change to the schema MUST result in the minor
+version number being incremented.
+
+##### YARN Node Manager State Store
+
+The YARN node manager stores information about the node state in an
+external state store for use in recovery. If the schema used for the state
+store data does not remain compatible, the node manager will not
+be able to recover its state and will fail to start. The state store data
+schema includes a version number that indicates compatibility.
+
+###### Policy
+
+The YARN node manager state store data schema SHALL be considered
+[Private](./InterfaceClassification.html#Private) and
+[Evolving](./InterfaceClassification.html#Evolving). Any incompatible change
+to the schema MUST result in the major version number of the schema being
+incremented. Any compatible change to the schema MUST result in the minor
+version number being incremented.
+
+##### YARN Federation State Store
+
+The YARN resource manager federation service stores information about the
+federated clusters, running applications, and routing policies in an
+external state store for use in replication and recovery. If the schema used
+for the state store data does not remain compatible, the federation service
+will fail to initialize. The state store data schema includes a version number
+that indicates compatibility.
+
+###### Policy
+
+The YARN federation service state store data schema SHALL be considered
+[Private](./InterfaceClassification.html#Private) and
+[Evolving](./InterfaceClassification.html#Evolving). Any incompatible change
+to the schema MUST result in the major version number of the schema being
+incremented. Any compatible change to the schema MUST result in the minor
+version number being incremented.
 
 ### Command Line Interface (CLI)
 
-The Hadoop command line programs may be used either directly via the system shell or via shell scripts. Changing the path of a command, removing or renaming command line options, the order of arguments, or the command return code and output break compatibility and may adversely affect users.
+The Hadoop command line programs may be used either directly via the system
+shell or via shell scripts. The CLIs include both the user-facing commands, such
+as the hdfs command or the yarn command, and the admin-facing commands, such as
+the scripts used to start and stop daemons.  Changing the path of a command,
+removing or renaming command line options, the order of arguments, or the
+command return codes and output break compatibility and adversely affect users.
 
 #### Policy
 
-CLI commands are to be deprecated (warning when used) for one major release before they are removed or incompatibly modified in a subsequent major release.
+All Hadoop CLI paths, usage, and output SHALL be considered
+[Public](./InterfaceClassification.html#Public) and
+[Stable](./InterfaceClassification.html#Stable).
+Note that the CLI output SHALL be considered distinct from the log output
+generated by the Hadoop CLIs. The latter SHALL be governed by the policy on log
+output. Note also that for CLI output, all changes SHALL be considered
+incompatible changes.
 
 ### Web UI
 
-Web UI, particularly the content and layout of web pages, changes could potentially interfere with attempts to screen scrape the web pages for information.
+Web UI, particularly the content and layout of web pages, changes could
+potentially interfere with attempts to screen scrape the web pages for
+information. The Hadoop Web UI pages, however, are not meant to be scraped, e.g.
+for automation purposes. Users are expected to use REST APIs to programmatically
+access cluster information.
 
 #### Policy
 
-Web pages are not meant to be scraped and hence incompatible changes to them are allowed at any time. Users are expected to use REST APIs to get any information.
+The Hadoop Web UI SHALL be considered
+[Public](./InterfaceClassification.html#Public) and
+[Unstable](./InterfaceClassification.html#Unstable).
 
 ### Hadoop Configuration Files
 
-Users use (1) Hadoop-defined properties to configure and provide hints to Hadoop and (2) custom properties to pass information to jobs. Hence, compatibility of config properties is two-fold:
-
-* Modifying key-names, units of values, and default values of Hadoop-defined properties.
-* Custom configuration property keys should not conflict with the namespace of Hadoop-defined properties. Typically, users should avoid using prefixes used by Hadoop: hadoop, io, ipc, fs, net, file, ftp, s3, kfs, ha, file, dfs, mapred, mapreduce, yarn.
+Users use Hadoop-defined properties to configure and provide hints to Hadoop and
+custom properties to pass information to jobs. Users are encouraged to avoid
+using custom configuration property names that conflict with the namespace of
+Hadoop-defined properties and should avoid using any prefixes used by Hadoop,
+e.g. hadoop, io, ipc, fs, net, file, ftp, s3, kfs, ha, file, dfs, mapred,
+mapreduce, and yarn.
 
 #### Policy
 
-* Hadoop-defined properties are to be deprecated at least for one major release before being removed. Modifying units for existing properties is not allowed.
-* The default values of Hadoop-defined properties can be changed across minor/major releases, but will remain the same across point releases within a minor release.
-* Currently, there is NO explicit policy regarding when new prefixes can be added/removed, and the list of prefixes to be avoided for custom configuration properties. However, as noted above, users should avoid using prefixes used by Hadoop: hadoop, io, ipc, fs, net, file, ftp, s3, kfs, ha, file, dfs, mapred, mapreduce, yarn.
+Hadoop-defined properties (names and meanings) SHALL be considered
+[Public](./InterfaceClassification.html#Public) and
+[Stable](./InterfaceClassification.html#Stable). The units implied by a
+Hadoop-defined property MUST NOT change, even
+across major versions. Default values of Hadoop-defined properties SHALL be
+considered [Public](./InterfaceClassification.html#Public) and
+[Evolving](./InterfaceClassification.html#Evolving).
+
+### Log4j Configuration Files
+
+The log output produced by Hadoop daemons and CLIs is governed by a set of
+configuration files. These files control the minimum level of log message that
+will be output by the various components of Hadoop, as well as where and how
+those messages are stored.
+
+#### Policy
+
+All Log4j configurations SHALL be considered
+[Public](./InterfaceClassification.html#Public) and
+[Evolving](./InterfaceClassification.html#Evolving).
 
 ### Directory Structure
 
-Source code, artifacts (source and tests), user logs, configuration files, output and job history are all stored on disk either local file system or HDFS. Changing the directory structure of these user-accessible files break compatibility, even in cases where the original path is preserved via symbolic links (if, for example, the path is accessed by a servlet that is configured to not follow symbolic links).
+Source code, artifacts (source and tests), user logs, configuration files,
+output, and job history are all stored on disk either local file system or HDFS.
+Changing the directory structure of these user-accessible files can break
+compatibility, even in cases where the original path is preserved via symbolic
+links (such as when the path is accessed by a servlet that is configured to
+not follow symbolic links).
 
 #### Policy
 
-* The layout of source code and build artifacts can change anytime, particularly so across major versions. Within a major version, the developers will attempt (no guarantees) to preserve the directory structure; however, individual files can be added/moved/deleted. The best way to ensure patches stay in sync with the code is to get them committed to the Apache source tree.
-* The directory structure of configuration files, user logs, and job history will be preserved across minor and point releases within a major release.
+The layout of source code and build artifacts SHALL be considered
+[Private](./InterfaceClassification.html#Private) and
+[Unstable](./InterfaceClassification.html#Unstable). Within a major version,
+the developer community SHOULD preserve the
+overall directory structure, though individual files MAY be added, moved, or
+deleted with no warning.
+
+The directory structure of configuration files, user logs, and job history SHALL
+be considered [Public](./InterfaceClassification.html#Public) and
+[Evolving](./InterfaceClassification.html#Evolving).
 
 ### Java Classpath
 
-User applications built against Hadoop might add all Hadoop jars (including Hadoop's library dependencies) to the application's classpath. Adding new dependencies or updating the version of existing dependencies may interfere with those in applications' classpaths.
+Hadoop provides several client artifacts that applications use to interact
+with the system. These artifacts typically have their own dependencies on
+common libraries. In the cases where these dependencies are exposed to
+end user applications or downstream consumers (i.e. not
+[shaded](https://stackoverflow.com/questions/13620281/what-is-the-maven-shade-plugin-used-for-and-why-would-you-want-to-relocate-java))
+changes to these dependencies can be disruptive. Developers are strongly
+encouraged to avoid exposing dependencies to clients by using techniques
+such as
+[shading](https://stackoverflow.com/questions/13620281/what-is-the-maven-shade-plugin-used-for-and-why-would-you-want-to-relocate-java).
+
+With regard to dependencies, adding a dependency is an incompatible change,
+whereas removing a dependency is a compatible change.
+
+Some user applications built against Hadoop may add all Hadoop JAR files
+(including Hadoop's library dependencies) to the application's classpath.
+Adding new dependencies or updating the versions of existing dependencies may
+interfere with those in applications' classpaths and hence their correct
+operation. Users are therefore discouraged from adopting this practice.
 
 #### Policy
 
-Currently, there is NO policy on when Hadoop's dependencies can change.
+The set of dependencies exposed by the Hadoop client artifacts SHALL be
+considered [Public](./InterfaceClassification.html#Public) and
+[Stable](./InterfaceClassification.html#Stable). Any dependencies that are not
+exposed to clients (either because they are shaded or only exist in non-client
+artifacts) SHALL be considered [Private](./InterfaceClassification.html#Private)
+and [Unstable](./InterfaceClassification.html#Unstable)
 
 ### Environment variables
 
-Users and related projects often utilize the exported environment variables (eg HADOOP\_CONF\_DIR), therefore removing or renaming environment variables is an incompatible change.
+Users and related projects often utilize the environment variables exported by
+Hadoop (e.g. HADOOP\_CONF\_DIR). Removing or renaming environment variables can
+therefore impact end user applications.
 
 #### Policy
 
-Currently, there is NO policy on when the environment variables can change. Developers try to limit changes to major releases.
+The environment variables consumed by Hadoop and the environment variables made
+accessible to applications through YARN SHALL be considered
+[Public](./InterfaceClassification.html#Public) and
+[Evolving](./InterfaceClassification.html#Evolving).
+The developer community SHOULD limit changes to major releases.
 
 ### Build artifacts
 
-Hadoop uses maven for project management and changing the artifacts can affect existing user workflows.
+Hadoop uses Maven for project management. Changes to the contents of
+generated artifacts can impact existing user applications.
 
 #### Policy
 
-* Test artifacts: The test jars generated are strictly for internal use and are not expected to be used outside of Hadoop, similar to APIs annotated @Private, @Unstable.
-* Built artifacts: The hadoop-client artifact (maven groupId:artifactId) stays compatible within a major release, while the other artifacts can change in incompatible ways.
+The contents of Hadoop test artifacts SHALL be considered
+[Private](./InterfaceClassification.html#Private) and
+[Unstable](./InterfaceClassification.html#Unstable). Test artifacts include
+all JAR files generated from test source code and all JAR files that include
+"tests" in the file name.
+
+The Hadoop client artifacts SHALL be considered
+[Public](./InterfaceClassification.html#Public) and
+[Stable](./InterfaceClassification.html#Stable). Client artifacts are the
+following:
+
+* hadoop-client
+* hadoop-client-api
+* hadoop-client-minicluster
+* hadoop-client-runtime
+* hadoop-hdfs-client
+* hadoop-hdfs-native-client
+* hadoop-mapreduce-client-app
+* hadoop-mapreduce-client-common
+* hadoop-mapreduce-client-core
+* hadoop-mapreduce-client-hs
+* hadoop-mapreduce-client-hs-plugins
+* hadoop-mapreduce-client-jobclient
+* hadoop-mapreduce-client-nativetask
+* hadoop-mapreduce-client-shuffle
+* hadoop-yarn-client
+
+All other build artifacts SHALL be considered
+[Private](./InterfaceClassification.html#Private) and
+[Unstable](./InterfaceClassification.html#Unstable).
 
 ### Hardware/Software Requirements
 
-To keep up with the latest advances in hardware, operating systems, JVMs, and other software, new Hadoop releases or some of their features might require higher versions of the same. For a specific environment, upgrading Hadoop might require upgrading other dependent software components.
+To keep up with the latest advances in hardware, operating systems, JVMs, and
+other software, new Hadoop releases may include features that require
+newer hardware, operating systems releases, or JVM versions than previous
+Hadoop releases. For a specific environment, upgrading Hadoop might require
+upgrading other dependent software components.
 
 #### Policies
 
 * Hardware
     * Architecture: The community has no plans to restrict Hadoop to specific architectures, but can have family-specific optimizations.
-    * Minimum resources: While there are no guarantees on the minimum resources required by Hadoop daemons, the community attempts to not increase requirements within a minor release.
-* Operating Systems: The community will attempt to maintain the same OS requirements (OS kernel versions) within a minor release. Currently GNU/Linux and Microsoft Windows are the OSes officially supported by the community while Apache Hadoop is known to work reasonably well on other OSes such as Apple MacOSX, Solaris etc.
-* The JVM requirements will not change across point releases within the same minor release except if the JVM version under question becomes unsupported. Minor/major releases might require later versions of JVM for some/all of the supported operating systems.
-* Other software: The community tries to maintain the minimum versions of additional software required by Hadoop. For example, ssh, kerberos etc.
+    * Minimum resources: While there are no guarantees on the minimum resources required by Hadoop daemons, the developer community SHOULD avoid increasing requirements within a minor release.
+* Operating Systems: The community SHOULD maintain the same minimum OS requirements (OS kernel versions) within a minor release. Currently GNU/Linux and Microsoft Windows are the OSes officially supported by the community, while Apache Hadoop is known to work reasonably well on other OSes such as Apple MacOSX, Solaris, etc.
+* The JVM requirements SHALL NOT change across minor releases within the same major release unless the JVM version in question becomes unsupported. The JVM version requirement MAY be different for different operating systems or even operating system releases.
+* File systems supported by Hadoop, e.g. through the HDFS FileSystem API, SHOULD not become unsupported between minor releases within a major version unless a migration path to an alternate client implementation is available.
 
 References
 ----------
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/InterfaceClassification.md b/hadoop-common-project/hadoop-common/src/site/markdown/InterfaceClassification.md
index c7309ab7714..451f9be3073 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/InterfaceClassification.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/InterfaceClassification.md
@@ -66,54 +66,103 @@ Hadoop uses the following kinds of audience in order of increasing/wider visibil
 
 #### Private
 
-The interface is for internal use within the project (such as HDFS or MapReduce)
-and should not be used by applications or by other projects. It is subject to
-change at anytime without notice. Most interfaces of a project are Private (also
-referred to as project-private).
+A Private interface is for internal use within the project (such as HDFS or
+MapReduce) and should not be used by applications or by other projects. Most
+interfaces of a project are Private (also referred to as project-private).
+Unless an interface is intentionally exposed for external consumption, it should
+be marked Private.
 
 #### Limited-Private
 
-The interface is used by a specified set of projects or systems (typically
-closely related projects). Other projects or systems should not use the
-interface. Changes to the interface will be communicated/negotiated with the
+A Limited-Private interface is used by a specified set of projects or systems
+(typically closely related projects). Other projects or systems should not use
+the interface. Changes to the interface will be communicated/negotiated with the
 specified projects. For example, in the Hadoop project, some interfaces are
 LimitedPrivate{HDFS, MapReduce} in that they are private to the HDFS and
 MapReduce projects.
 
 #### Public
 
-The interface is for general use by any application.
+A Public interface is for general use by any application.
+
+### Change Compatibility
+
+Changes to an API fall into two broad categories: compatible and incompatible.
+A compatible change is a change that meets the following criteria:
+
+* no existing capabilities are removed,
+* no existing capabilities are modified in a way that prevents their use by clients that were constructed to use the interface prior to the change, and
+* no capabilities are added that require changes to clients that were constructed to use the interface prior to the change.
+
+Any change that does not meet these three criteria is an incompatible change.
+Stated simply a compatible change will not break existing clients.  These
+examples are compatible changes:
+
+* adding a method to a Java class,
+* adding an optional parameter to a RESTful web service, or
+* adding a tag to an XML document.
+* making the audience annotation of an interface more broad (e.g. from Private to Public) or the change compatibility annotation more restrictive (e.g. from Evolving to Stable)
+
+These examples are incompatible changes:
+
+* removing a method from a Java class,
+* adding a method to a Java interface,
+* adding a required parameter to a RESTful web service, or
+* renaming a field in a JSON document.
+* making the audience annotation of an interface less broad (e.g. from Public to Limited Private) or the change compatibility annotation more restrictive (e.g. from Evolving to Unstable)
 
 ### Stability
 
-Stability denotes how stable an interface is, as in when incompatible changes to
-the interface are allowed. Hadoop APIs have the following levels of stability.
+Stability denotes how stable an interface is and when compatible and
+incompatible changes to the interface are allowed. Hadoop APIs have the
+following levels of stability.
 
 #### Stable
 
-Can evolve while retaining compatibility for minor release boundaries; in other
-words, incompatible changes to APIs marked as Stable are allowed only at major
-releases (i.e. at m.0).
+A Stable interface is exposed as a preferred means of communication. A Stable
+interface is expected not to change incompatibly within a major release and
+hence serves as a safe development target. A Stable interface may evolve
+compatibly between minor releases.
+
+Incompatible changes allowed: major (X.0.0)
+Compatible changes allowed: maintenance (x.Y.0)
 
 #### Evolving
 
-Evolving, but incompatible changes are allowed at minor releases (i.e. m .x)
+An Evolving interface is typically exposed so that users or external code can
+make use of a feature before it has stabilized. The expectation that an
+interface should "eventually" stabilize and be promoted to Stable, however,
+is not a requirement for the interface to be labeled as Evolving.
+
+Incompatible changes are allowed for Evolving interface only at minor releases.
+
+Incompatible changes allowed: minor (x.Y.0)
+Compatible changes allowed: maintenance (x.y.Z)
 
 #### Unstable
 
-Incompatible changes to Unstable APIs are allowed at any time. This usually makes
-sense for only private interfaces.
+An Unstable interface is one for which no compatibility guarantees are made. An
+Unstable interface is not necessarily unstable. An unstable interface is
+typically exposed because a user or external code needs to access an interface
+that is not intended for consumption. The interface is exposed as an Unstable
+interface to state clearly that even though the interface is exposed, it is not
+the preferred access path, and no compatibility guarantees are made for it.
 
-However one may call this out for a supposedly public interface to highlight
-that it should not be used as an interface; for public interfaces, labeling it
-as Not-an-interface is probably more appropriate than "Unstable".
+Unless there is a reason to offer a compatibility guarantee on an interface,
+whether it is exposed or not, it should be labeled as Unstable.  Private
+interfaces also should be Unstable in most cases.
 
-Examples of publicly visible interfaces that are unstable
-(i.e. not-an-interface): GUI, CLIs whose output format will change.
+Incompatible changes to Unstable interfaces are allowed at any time.
+
+Incompatible changes allowed: maintenance (x.y.Z)
+Compatible changes allowed: maintenance (x.y.Z)
 
 #### Deprecated
 
-APIs that could potentially be removed in the future and should not be used.
+A Deprecated interface could potentially be removed in the future and should
+not be used.  Even so, a Deprecated interface will continue to function until
+it is removed.  When a Deprecated interface can be removed depends on whether
+it is also Stable, Evolving, or Unstable.
 
 How are the Classifications Recorded?
 -------------------------------------
@@ -121,95 +170,101 @@ How are the Classifications Recorded?
 How will the classification be recorded for Hadoop APIs?
 
 * Each interface or class will have the audience and stability recorded using
-  annotations in org.apache.hadoop.classification package.
+  annotations in the org.apache.hadoop.classification package.
 
-* The javadoc generated by the maven target javadoc:javadoc lists only the public API.
+* The javadoc generated by the maven target javadoc:javadoc lists only the
+  public API.
 
 * One can derive the audience of java classes and java interfaces by the
   audience of the package in which they are contained. Hence it is useful to
   declare the audience of each java package as public or private (along with the
   private audience variations).
 
+How will the classification be recorded for other interfaces, such as CLIs?
+
+* See the [Hadoop Compatibility](Compatibility.html) page for details.
+
 FAQ
 ---
 
 * Why aren’t the java scopes (private, package private and public) good enough?
     * Java’s scoping is not very complete. One is often forced to make a class
-      public in order for other internal components to use it. It does not have
-      friends or sub-package-private like C++.
+      public in order for other internal components to use it. It also does not
+      have friends or sub-package-private like C++.
 
-* But I can easily access a private implementation interface if it is Java public.
-  Where is the protection and control?
-    * The purpose of this is not providing absolute access control. Its purpose
-      is to communicate to users and developers. One can access private
-      implementation functions in libc; however if they change the internal
-      implementation details, your application will break and you will have
-      little sympathy from the folks who are supplying libc. If you use a
-      non-public interface you understand the risks.
+* But I can easily access a Private interface if it is Java public. Where is the
+  protection and control?
+    * The purpose of this classification scheme is not providing absolute
+      access control. Its purpose is to communicate to users and developers.
+      One can access private implementation functions in libc; however if
+      they change the internal implementation details, the application will
+      break and one will receive little sympathy from the folks who are
+      supplying libc. When using a non-public interface, the risks are
+      understood.
 
-* Why bother declaring the stability of a private interface?
-  Aren’t private interfaces always unstable?
-    * Private interfaces are not always unstable. In the cases where they are
-      stable they capture internal properties of the system and can communicate
+* Why bother declaring the stability of a Private interface? Aren’t Private
+  interfaces always Unstable?
+    * Private interfaces are not always Unstable. In the cases where they are
+      Stable they capture internal properties of the system and can communicate
       these properties to its internal users and to developers of the interface.
-        * e.g. In HDFS, NN-DN protocol is private but stable and can help
-          implement rolling upgrades. It communicates that this interface should
-          not be changed in incompatible ways even though it is private.
-        * e.g. In HDFS, FSImage stability provides more flexible rollback.
+        * e.g. In HDFS, NN-DN protocol is Private but Stable and can help
+          implement rolling upgrades. The stability annotation communicates that
+          this interface should not be changed in incompatible ways even though
+          it is Private.
+        * e.g. In HDFS, FSImage the Stabile designation provides more flexible
+          rollback.
 
-* What is the harm in applications using a private interface that is stable? How
-  is it different than a public stable interface?
-    * While a private interface marked as stable is targeted to change only at
+* What is the harm in applications using a Private interface that is Stable?
+  How is it different from a Public Stable interface?
+    * While a Private interface marked as Stable is targeted to change only at
       major releases, it may break at other times if the providers of that
-      interface are willing to change the internal users of that
-      interface. Further, a public stable interface is less likely to break even
+      interface also are willing to change the internal consumers of that
+      interface. Further, a Public Stable interface is less likely to break even
       at major releases (even though it is allowed to break compatibility)
-      because the impact of the change is larger. If you use a private interface
+      because the impact of the change is larger. If you use a Private interface
       (regardless of its stability) you run the risk of incompatibility.
 
-* Why bother with Limited-private? Isn’t it giving special treatment to some projects?
-  That is not fair.
-    * First, most interfaces should be public or private; actually let us state
-      it even stronger: make it private unless you really want to expose it to
-      public for general use.
-    * Limited-private is for interfaces that are not intended for general
+* Why bother with Limited-Private? Isn’t it giving special treatment to some
+  projects? That is not fair.
+    * Most interfaces should be Public or Private. An interface should be
+      Private unless it is explicitly intended for general use.
+    * Limited-Private is for interfaces that are not intended for general
       use. They are exposed to related projects that need special hooks. Such a
-      classification has a cost to both the supplier and consumer of the limited
+      classification has a cost to both the supplier and consumer of the
       interface. Both will have to work together if ever there is a need to
       break the interface in the future; for example the supplier and the
       consumers will have to work together to get coordinated releases of their
-      respective projects. This should not be taken lightly – if you can get
-      away with private then do so; if the interface is really for general use
-      for all applications then do so. But remember that making an interface
-      public has huge responsibility. Sometimes Limited-private is just right.
-    * A good example of a limited-private interface is BlockLocations, This is a
-      fairly low-level interface that we are willing to expose to MR and perhaps
-      HBase. We are likely to change it down the road and at that time we will
-      coordinate release effort with the MR team.
-      While MR and HDFS are always released in sync today, they may
-      change down the road.
-    * If you have a limited-private interface with many projects listed then you
-      are fooling yourself. It is practically public.
-    * It might be worth declaring a special audience classification called
-      Hadoop-Private for the Hadoop family.
+      respective projects. This contract should not be taken lightly–use
+      Private if possible; if the interface is really for general use
+      for all applications then use Public. Always remember that making an
+      interface Public comes with large burden of responsibility. Sometimes
+      Limited-Private is just right.
+    * A good example of a Limited-Private interface is BlockLocations. This
+      interface is a fairly low-level interface that is exposed to MapReduce
+      and HBase. The interface is likely to change down the road, and at that
+      time the release effort will have to be coordinated with the
+      MapReduce development team. While MapReduce and HDFS are always released
+      in sync today, that policy may change down the road.
+    * If you have a Limited-Private interface with many projects listed then
+      the interface is probably a good candidate to be made Public.
 
-* Lets treat all private interfaces as Hadoop-private. What is the harm in
-  projects in the Hadoop family have access to private classes?
-    * Do we want MR accessing class files that are implementation details inside
-      HDFS. There used to be many such layer violations in the code that we have
-      been cleaning up over the last few years. We don’t want such layer
-      violations to creep back in by no separating between the major components
-      like HDFS and MR.
+* Let's treat all Private interfaces as Limited-Private for all of Hadoop. What
+  is the harm if projects in the Hadoop family have access to private classes?
+    * There used to be many cases in the code where one project depended on the
+      internal implementation details of another. A significant effort went
+      into cleaning up those issues. Opening up all interfaces as
+      Limited-Private for all of Hadoop would open the door to reintroducing
+      such coupling issues.
 
-* Aren't all public interfaces stable?
-    * One may mark a public interface as evolving in its early days. Here one is
+* Aren't all Public interfaces Stable?
+    * One may mark a Public interface as Evolving in its early days. Here one is
       promising to make an effort to make compatible changes but may need to
       break it at minor releases.
-    * One example of a public interface that is unstable is where one is
+    * One example of a Public interface that is Unstable is where one is
       providing an implementation of a standards-body based interface that is
       still under development. For example, many companies, in an attempt to be
       first to market, have provided implementations of a new NFS protocol even
       when the protocol was not fully completed by IETF. The implementor cannot
-      evolve the interface in a fashion that causes least distruption because
+      evolve the interface in a fashion that causes least disruption because
       the stability is controlled by the standards body. Hence it is appropriate
-      to label the interface as unstable.
+      to label the interface as Unstable.
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md
index 1e522c7782c..e67cbe32d42 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md
@@ -605,7 +605,7 @@ The result is `FSDataOutputStream`, which through its operations may generate ne
  clients creating files with `overwrite==true` to fail if the file is created
  by another client between the two tests.
 
-* S3N, S3A, Swift and potentially other Object Stores do not currently change the FS state
+* S3A, Swift and potentially other Object Stores do not currently change the FS state
 until the output stream `close()` operation is completed.
 This MAY be a bug, as it allows >1 client to create a file with `overwrite==false`,
  and potentially confuse file/directory logic
@@ -961,7 +961,7 @@ The outcome is no change to FileSystem state, with a return value of false.
 
     FS' = FS; result = False
 
-*Local Filesystem, S3N*
+*Local Filesystem*
 
 The outcome is as a normal rename, with the additional (implicit) feature
 that the parent directories of the destination also exist.
@@ -1262,4 +1262,4 @@ It currently supports to query:
  * `StreamCapabilties.HFLUSH` ("*hflush*"): the capability to flush out the data
  in client's buffer.
  * `StreamCapabilities.HSYNC` ("*hsync*"): capability to flush out the data in
- client's buffer and the disk device.
\ No newline at end of file
+ client's buffer and the disk device.
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/introduction.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/introduction.md
index 12a796717df..37191a5b2a6 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/introduction.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/introduction.md
@@ -29,11 +29,10 @@ return codes of Unix filesystem actions as a reference. Even so, there
 are places where HDFS diverges from the expected behaviour of a POSIX
 filesystem.
 
-The behaviour of other Hadoop filesystems are not as rigorously tested.
-The bundled S3N and S3A FileSystem clients make Amazon's S3 Object Store ("blobstore")
+The bundled S3A FileSystem clients make Amazon's S3 Object Store ("blobstore")
 accessible through the FileSystem API. The Swift FileSystem driver provides similar
-functionality for the OpenStack Swift blobstore. The Azure object storage
-FileSystem talks to Microsoft's Azure equivalent. All of these
+functionality for the OpenStack Swift blobstore. The Azure WASB and ADL object
+storage FileSystems talks to Microsoft's Azure storage. All of these
 bind to object stores, which do have different behaviors, especially regarding
 consistency guarantees, and atomicity of operations.
 
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md
index 6823e0c6a05..4c6fa3ff0f6 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md
@@ -195,21 +195,21 @@ equivalent. Furthermore, the build MUST be configured to never bundle this file
 In addition, `src/test/resources/auth-keys.xml` will need to be created.  It can be a copy of `contract-test-options.xml`.
 The `AbstractFSContract` class automatically loads this resource file if present; specific keys for specific test cases can be added.
 
-As an example, here are what S3N test keys look like:
+As an example, here are what S3A test keys look like:
 
     
       
-        fs.contract.test.fs.s3n
-        s3n://tests3contract
+        fs.contract.test.fs.s3a
+        s3a://tests3contract
       
 
       
-        fs.s3n.awsAccessKeyId
+        fs.s3a.access.key
         DONOTPCOMMITTHISKEYTOSCM
       
 
       
-        fs.s3n.awsSecretAccessKey
+        fs.s3a.secret.key
         DONOTEVERSHARETHISSECRETKEY!
       
     
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java
index 33248864f2e..864c10ce207 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java
@@ -94,14 +94,10 @@ public void initializeMemberVariables() {
     xmlPropsToSkipCompare.add("hadoop.tmp.dir");
     xmlPropsToSkipCompare.add("nfs3.mountd.port");
     xmlPropsToSkipCompare.add("nfs3.server.port");
-    xmlPropsToSkipCompare.add("test.fs.s3n.name");
     xmlPropsToSkipCompare.add("fs.viewfs.rename.strategy");
 
-    // S3N/S3A properties are in a different subtree.
-    // - org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys
+    // S3A properties are in a different subtree.
     xmlPrefixToSkipCompare.add("fs.s3a.");
-    xmlPrefixToSkipCompare.add("fs.s3n.");
-    xmlPrefixToSkipCompare.add("s3native.");
 
     // WASB properties are in a different subtree.
     // - org.apache.hadoop.fs.azure.NativeAzureFileSystem
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigRedactor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigRedactor.java
index 4790f7c6d39..313394293c0 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigRedactor.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigRedactor.java
@@ -54,7 +54,6 @@ private void testRedact(Configuration conf) throws Exception {
         "fs.s3a.bucket.BUCKET.secret.key",
         "fs.s3a.server-side-encryption.key",
         "fs.s3a.bucket.engineering.server-side-encryption.key",
-        "fs.s3n.awsSecretKey",
         "fs.azure.account.key.abcdefg.blob.core.windows.net",
         "fs.adl.oauth2.refresh.token",
         "fs.adl.oauth2.credential",
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java
index 2c19722ba89..d14dd59c773 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/kms/TestLoadBalancingKMSClientProvider.java
@@ -23,12 +23,9 @@
 import static org.junit.Assert.fail;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
-import static org.mockito.Mockito.verify;
 
 import java.io.IOException;
-import java.net.NoRouteToHostException;
 import java.net.URI;
-import java.net.UnknownHostException;
 import java.security.GeneralSecurityException;
 import java.security.NoSuchAlgorithmException;
 
@@ -36,9 +33,6 @@
 import org.apache.hadoop.crypto.key.KeyProvider;
 import org.apache.hadoop.crypto.key.KeyProvider.Options;
 import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension;
-import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
-import org.apache.hadoop.net.ConnectTimeoutException;
-import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.authentication.client.AuthenticationException;
 import org.apache.hadoop.security.authorize.AuthorizationException;
 import org.junit.Test;
@@ -53,17 +47,14 @@ public void testCreation() throws Exception {
     Configuration conf = new Configuration();
     KeyProvider kp = new KMSClientProvider.Factory().createProvider(new URI(
         "kms://http@host1/kms/foo"), conf);
-    assertTrue(kp instanceof LoadBalancingKMSClientProvider);
-    KMSClientProvider[] providers =
-        ((LoadBalancingKMSClientProvider) kp).getProviders();
-    assertEquals(1, providers.length);
-    assertEquals(Sets.newHashSet("http://host1/kms/foo/v1/"),
-        Sets.newHashSet(providers[0].getKMSUrl()));
+    assertTrue(kp instanceof KMSClientProvider);
+    assertEquals("http://host1/kms/foo/v1/",
+        ((KMSClientProvider) kp).getKMSUrl());
 
     kp = new KMSClientProvider.Factory().createProvider(new URI(
         "kms://http@host1;host2;host3/kms/foo"), conf);
     assertTrue(kp instanceof LoadBalancingKMSClientProvider);
-    providers =
+    KMSClientProvider[] providers =
         ((LoadBalancingKMSClientProvider) kp).getProviders();
     assertEquals(3, providers.length);
     assertEquals(Sets.newHashSet("http://host1/kms/foo/v1/",
@@ -131,7 +122,7 @@ public void testLoadBalancingWithFailure() throws Exception {
     // This should be retried
     KMSClientProvider p4 = mock(KMSClientProvider.class);
     when(p4.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new ConnectTimeoutException("p4"));
+        .thenThrow(new IOException("p4"));
     when(p4.getKMSUrl()).thenReturn("p4");
     KeyProvider kp = new LoadBalancingKMSClientProvider(
         new KMSClientProvider[] { p1, p2, p3, p4 }, 0, conf);
@@ -329,298 +320,4 @@ public void testWarmUpEncryptedKeysWhenOneProviderSucceeds()
     Mockito.verify(p1, Mockito.times(1)).warmUpEncryptedKeys(keyName);
     Mockito.verify(p2, Mockito.times(1)).warmUpEncryptedKeys(keyName);
   }
-
-  /**
-   * Tests whether retryPolicy fails immediately on encountering IOException
-   * which is not SocketException.
-   * @throws Exception
-   */
-  @Test
-  public void testClientRetriesWithIOException() throws Exception {
-    Configuration conf = new Configuration();
-    // Setting total failover attempts to .
-    conf.setInt(
-        CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 10);
-    KMSClientProvider p1 = mock(KMSClientProvider.class);
-    when(p1.getMetadata(Mockito.anyString()))
-        .thenThrow(new IOException("p1"));
-    KMSClientProvider p2 = mock(KMSClientProvider.class);
-    when(p2.getMetadata(Mockito.anyString()))
-        .thenThrow(new IOException("p2"));
-    KMSClientProvider p3 = mock(KMSClientProvider.class);
-    when(p3.getMetadata(Mockito.anyString()))
-        .thenThrow(new IOException("p3"));
-
-    when(p1.getKMSUrl()).thenReturn("p1");
-    when(p2.getKMSUrl()).thenReturn("p2");
-    when(p3.getKMSUrl()).thenReturn("p3");
-    LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
-        new KMSClientProvider[] {p1, p2, p3}, 0, conf);
-    try {
-      kp.getMetadata("test3");
-      fail("Should fail since all providers threw an IOException");
-    } catch (Exception e) {
-      assertTrue(e instanceof IOException);
-    }
-    verify(kp.getProviders()[0], Mockito.times(1))
-        .getMetadata(Mockito.eq("test3"));
-    verify(kp.getProviders()[1], Mockito.never())
-        .getMetadata(Mockito.eq("test3"));
-    verify(kp.getProviders()[2], Mockito.never())
-        .getMetadata(Mockito.eq("test3"));
-  }
-
-  /**
-   * Tests that client doesn't retry once it encounters AccessControlException
-   * from first provider.
-   * This assumes all the kms servers are configured with identical access to
-   * keys.
-   * @throws Exception
-   */
-  @Test
-  public void testClientRetriesWithAccessControlException() throws Exception {
-    Configuration conf = new Configuration();
-    conf.setInt(
-        CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 3);
-    KMSClientProvider p1 = mock(KMSClientProvider.class);
-    when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new AccessControlException("p1"));
-    KMSClientProvider p2 = mock(KMSClientProvider.class);
-    when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new IOException("p2"));
-    KMSClientProvider p3 = mock(KMSClientProvider.class);
-    when(p3.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new IOException("p3"));
-
-    when(p1.getKMSUrl()).thenReturn("p1");
-    when(p2.getKMSUrl()).thenReturn("p2");
-    when(p3.getKMSUrl()).thenReturn("p3");
-    LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
-        new KMSClientProvider[] {p1, p2, p3}, 0, conf);
-    try {
-      kp.createKey("test3", new Options(conf));
-      fail("Should fail because provider p1 threw an AccessControlException");
-    } catch (Exception e) {
-      assertTrue(e instanceof AccessControlException);
-    }
-    verify(p1, Mockito.times(1)).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-    verify(p2, Mockito.never()).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-    verify(p3, Mockito.never()).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-  }
-
-  /**
-   * Tests that client doesn't retry once it encounters RunTimeException
-   * from first provider.
-   * This assumes all the kms servers are configured with identical access to
-   * keys.
-   * @throws Exception
-   */
-  @Test
-  public void testClientRetriesWithRuntimeException() throws Exception {
-    Configuration conf = new Configuration();
-    conf.setInt(
-        CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 3);
-    KMSClientProvider p1 = mock(KMSClientProvider.class);
-    when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new RuntimeException("p1"));
-    KMSClientProvider p2 = mock(KMSClientProvider.class);
-    when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new IOException("p2"));
-
-    when(p1.getKMSUrl()).thenReturn("p1");
-    when(p2.getKMSUrl()).thenReturn("p2");
-
-    LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
-        new KMSClientProvider[] {p1, p2}, 0, conf);
-    try {
-      kp.createKey("test3", new Options(conf));
-      fail("Should fail since provider p1 threw RuntimeException");
-    } catch (Exception e) {
-      assertTrue(e instanceof RuntimeException);
-    }
-    verify(p1, Mockito.times(1)).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-    verify(p2, Mockito.never()).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-  }
-
-  /**
-   * Tests the client retries until it finds a good provider.
-   * @throws Exception
-   */
-  @Test
-  public void testClientRetriesWithTimeoutsException() throws Exception {
-    Configuration conf = new Configuration();
-    conf.setInt(
-        CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 4);
-    KMSClientProvider p1 = mock(KMSClientProvider.class);
-    when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new ConnectTimeoutException("p1"));
-    KMSClientProvider p2 = mock(KMSClientProvider.class);
-    when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new UnknownHostException("p2"));
-    KMSClientProvider p3 = mock(KMSClientProvider.class);
-    when(p3.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new NoRouteToHostException("p3"));
-    KMSClientProvider p4 = mock(KMSClientProvider.class);
-    when(p4.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenReturn(
-            new KMSClientProvider.KMSKeyVersion("test3", "v1", new byte[0]));
-    when(p1.getKMSUrl()).thenReturn("p1");
-    when(p2.getKMSUrl()).thenReturn("p2");
-    when(p3.getKMSUrl()).thenReturn("p3");
-    when(p4.getKMSUrl()).thenReturn("p4");
-    LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
-        new KMSClientProvider[] {p1, p2, p3, p4}, 0, conf);
-    try {
-      kp.createKey("test3", new Options(conf));
-    } catch (Exception e) {
-      fail("Provider p4 should have answered the request.");
-    }
-    verify(p1, Mockito.times(1)).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-    verify(p2, Mockito.times(1)).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-    verify(p3, Mockito.times(1)).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-    verify(p4, Mockito.times(1)).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-  }
-
-  /**
-   * Tests the operation succeeds second time after ConnectTimeoutException.
-   * @throws Exception
-   */
-  @Test
-  public void testClientRetriesSucceedsSecondTime() throws Exception {
-    Configuration conf = new Configuration();
-    conf.setInt(
-        CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 3);
-    KMSClientProvider p1 = mock(KMSClientProvider.class);
-    when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new ConnectTimeoutException("p1"))
-        .thenReturn(new KMSClientProvider.KMSKeyVersion("test3", "v1",
-                new byte[0]));
-    KMSClientProvider p2 = mock(KMSClientProvider.class);
-    when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new ConnectTimeoutException("p2"));
-
-    when(p1.getKMSUrl()).thenReturn("p1");
-    when(p2.getKMSUrl()).thenReturn("p2");
-
-    LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
-        new KMSClientProvider[] {p1, p2}, 0, conf);
-    try {
-      kp.createKey("test3", new Options(conf));
-    } catch (Exception e) {
-      fail("Provider p1 should have answered the request second time.");
-    }
-    verify(p1, Mockito.times(2)).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-    verify(p2, Mockito.times(1)).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-  }
-
-  /**
-   * Tests whether retryPolicy retries specified number of times.
-   * @throws Exception
-   */
-  @Test
-  public void testClientRetriesSpecifiedNumberOfTimes() throws Exception {
-    Configuration conf = new Configuration();
-    conf.setInt(
-        CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 10);
-    KMSClientProvider p1 = mock(KMSClientProvider.class);
-    when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new ConnectTimeoutException("p1"));
-    KMSClientProvider p2 = mock(KMSClientProvider.class);
-    when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new ConnectTimeoutException("p2"));
-
-    when(p1.getKMSUrl()).thenReturn("p1");
-    when(p2.getKMSUrl()).thenReturn("p2");
-
-    LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
-        new KMSClientProvider[] {p1, p2}, 0, conf);
-    try {
-      kp.createKey("test3", new Options(conf));
-      fail("Should fail");
-    } catch (Exception e) {
-     assert (e instanceof ConnectTimeoutException);
-    }
-    verify(p1, Mockito.times(6)).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-    verify(p2, Mockito.times(5)).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-  }
-
-  /**
-   * Tests whether retryPolicy retries number of times equals to number of
-   * providers if conf kms.client.failover.max.attempts is not set.
-   * @throws Exception
-   */
-  @Test
-  public void testClientRetriesIfMaxAttemptsNotSet() throws Exception {
-    Configuration conf = new Configuration();
-    KMSClientProvider p1 = mock(KMSClientProvider.class);
-    when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new ConnectTimeoutException("p1"));
-    KMSClientProvider p2 = mock(KMSClientProvider.class);
-    when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new ConnectTimeoutException("p2"));
-
-    when(p1.getKMSUrl()).thenReturn("p1");
-    when(p2.getKMSUrl()).thenReturn("p2");
-
-    LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
-        new KMSClientProvider[] {p1, p2}, 0, conf);
-    try {
-      kp.createKey("test3", new Options(conf));
-      fail("Should fail");
-    } catch (Exception e) {
-     assert (e instanceof ConnectTimeoutException);
-    }
-    verify(p1, Mockito.times(2)).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-    verify(p2, Mockito.times(1)).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-  }
-
-  /**
-   * Tests that client doesn't retry once it encounters AuthenticationException
-   * wrapped in an IOException from first provider.
-   * @throws Exception
-   */
-  @Test
-  public void testClientRetriesWithAuthenticationExceptionWrappedinIOException()
-      throws Exception {
-    Configuration conf = new Configuration();
-    conf.setInt(
-        CommonConfigurationKeysPublic.KMS_CLIENT_FAILOVER_MAX_RETRIES_KEY, 3);
-    KMSClientProvider p1 = mock(KMSClientProvider.class);
-    when(p1.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new IOException(new AuthenticationException("p1")));
-    KMSClientProvider p2 = mock(KMSClientProvider.class);
-    when(p2.createKey(Mockito.anyString(), Mockito.any(Options.class)))
-        .thenThrow(new ConnectTimeoutException("p2"));
-
-    when(p1.getKMSUrl()).thenReturn("p1");
-    when(p2.getKMSUrl()).thenReturn("p2");
-
-    LoadBalancingKMSClientProvider kp = new LoadBalancingKMSClientProvider(
-        new KMSClientProvider[] {p1, p2}, 0, conf);
-    try {
-      kp.createKey("test3", new Options(conf));
-      fail("Should fail since provider p1 threw AuthenticationException");
-    } catch (Exception e) {
-      assertTrue(e.getCause() instanceof AuthenticationException);
-    }
-    verify(p1, Mockito.times(1)).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-    verify(p2, Mockito.never()).createKey(Mockito.eq("test3"),
-            Mockito.any(Options.class));
-  }
-}
\ No newline at end of file
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java
index 9d8cd64ca4b..a4ccee3f7f5 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java
@@ -61,7 +61,16 @@ public abstract class FileSystemContractBaseTest {
   protected byte[] data = dataset(getBlockSize() * 2, 0, 255);
 
   @Rule
-  public Timeout globalTimeout = new Timeout(30000);
+  public Timeout globalTimeout = new Timeout(getGlobalTimeout());
+
+  /**
+   * Get the timeout in milliseconds for each test case.
+   * @return a time in milliseconds.
+   */
+  protected int getGlobalTimeout() {
+    return 30 * 1000;
+  }
+
   @Rule
   public ExpectedException thrown = ExpectedException.none();
 
@@ -246,39 +255,18 @@ public void testMkdirsFailsForSubdirectoryOfExistingFile() throws Exception {
 
   @Test
   public void testMkdirsWithUmask() throws Exception {
-    if (!isS3(fs)) {
-      Configuration conf = fs.getConf();
-      String oldUmask = conf.get(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY);
-      try {
-        conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, TEST_UMASK);
-        final Path dir = path("newDir");
-        assertTrue(fs.mkdirs(dir, new FsPermission((short) 0777)));
-        FileStatus status = fs.getFileStatus(dir);
-        assertTrue(status.isDirectory());
-        assertEquals((short) 0715, status.getPermission().toShort());
-      } finally {
-        conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, oldUmask);
-      }
-    }
-  }
-
-  /**
-   * Skip permission tests for S3FileSystem until HDFS-1333 is fixed.
-   * Classes that do not implement {@link FileSystem#getScheme()} method
-   * (e.g {@link RawLocalFileSystem}) will throw an
-   * {@link UnsupportedOperationException}.
-   * @param fileSystem FileSystem object to determine if it is S3 or not
-   * @return true if S3 false in any other case
-   */
-  private boolean isS3(FileSystem fileSystem) {
+    Configuration conf = fs.getConf();
+    String oldUmask = conf.get(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY);
     try {
-      if (fileSystem.getScheme().equals("s3n")) {
-        return true;
-      }
-    } catch (UnsupportedOperationException e) {
-      LOG.warn("Unable to determine the schema of filesystem.");
+      conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, TEST_UMASK);
+      final Path dir = path("newDir");
+      assertTrue(fs.mkdirs(dir, new FsPermission((short) 0777)));
+      FileStatus status = fs.getFileStatus(dir);
+      assertTrue(status.isDirectory());
+      assertEquals((short) 0715, status.getPermission().toShort());
+    } finally {
+      conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, oldUmask);
     }
-    return false;
   }
 
   @Test
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java
index f9b16f47949..ccf188f1202 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java
@@ -122,7 +122,7 @@ public void testOpenFileTwice() throws Throwable {
     Path path = path("testopenfiletwice.txt");
     byte[] block = dataset(TEST_FILE_LEN, 0, 255);
     //this file now has a simple rule: offset => value
-    createFile(getFileSystem(), path, false, block);
+    createFile(getFileSystem(), path, true, block);
     //open first
     FSDataInputStream instream1 = getFileSystem().open(path);
     FSDataInputStream instream2 = null;
@@ -150,7 +150,7 @@ public void testSequentialRead() throws Throwable {
     int base = 0x40; // 64
     byte[] block = dataset(len, base, base + len);
     //this file now has a simple rule: offset => (value | 0x40)
-    createFile(getFileSystem(), path, false, block);
+    createFile(getFileSystem(), path, true, block);
     //open first
     instream = getFileSystem().open(path);
     assertEquals(base, instream.read());
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractSeekTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractSeekTest.java
index 3e716820ba0..7af3cb0a525 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractSeekTest.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractSeekTest.java
@@ -341,7 +341,7 @@ public void testRandomSeeks() throws Throwable {
     int filesize = 10 * 1024;
     byte[] buf = dataset(filesize, 0, 255);
     Path randomSeekFile = path("testrandomseeks.bin");
-    createFile(getFileSystem(), randomSeekFile, false, buf);
+    createFile(getFileSystem(), randomSeekFile, true, buf);
     Random r = new Random();
 
     // Record the sequence of seeks and reads which trigger a failure.
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/permission/TestFsPermission.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/permission/TestFsPermission.java
index a22985de505..afddf80a25c 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/permission/TestFsPermission.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/permission/TestFsPermission.java
@@ -138,6 +138,63 @@ public void testFsPermission() {
     }
   }
 
+  @Test
+  public void testFsSymbolicConstructorWithNormalInput() {
+
+    // Test cases for symbolic representation
+
+    //Added both Octal and short representation to show with sticky bit
+
+    assertEquals(777, new FsPermission("+rwx").toOctal());
+    assertEquals(0777, new FsPermission("+rwx").toShort());
+
+    assertEquals(444, new FsPermission("+r").toOctal());
+    assertEquals(0444, new FsPermission("+r").toShort());
+
+    assertEquals(222, new FsPermission("+w").toOctal());
+    assertEquals(0222, new FsPermission("+w").toShort());
+
+    assertEquals(111, new FsPermission("+x").toOctal());
+    assertEquals(0111, new FsPermission("+x").toShort());
+
+    assertEquals(666, new FsPermission("+rw").toOctal());
+    assertEquals(0666, new FsPermission("+rw").toShort());
+
+    assertEquals(333, new FsPermission("+wx").toOctal());
+    assertEquals(0333, new FsPermission("+wx").toShort());
+
+    assertEquals(555, new FsPermission("+rx").toOctal());
+    assertEquals(0555, new FsPermission("+rx").toShort());
+
+
+    // Test case is to test with repeated values in mode.
+    // Repeated value in input will be ignored as duplicate.
+
+    assertEquals(666, new FsPermission("+rwr").toOctal());
+    assertEquals(0666, new FsPermission("+rwr").toShort());
+
+    assertEquals(000, new FsPermission("-rwr").toOctal());
+    assertEquals(0000, new FsPermission("-rwr").toShort());
+
+    assertEquals(1666, new FsPermission("+rwrt").toOctal());
+    assertEquals(01666, new FsPermission("+rwrt").toShort());
+
+    assertEquals(000, new FsPermission("-rwrt").toOctal());
+    assertEquals(0000, new FsPermission("-rwrt").toShort());
+
+    assertEquals(1777, new FsPermission("+rwxt").toOctal());
+    assertEquals(01777, new FsPermission("+rwxt").toShort());
+
+
+    assertEquals(000, new FsPermission("-rt").toOctal());
+    assertEquals(0000, new FsPermission("-rt").toShort());
+
+    assertEquals(000, new FsPermission("-rwx").toOctal());
+    assertEquals(0000, new FsPermission("-rwx").toShort());
+
+  }
+
+
   @Test
   public void testSymbolicPermission() {
     for (int i = 0; i < SYMBOLIC.length; ++i) {
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/sftp/TestSFTPFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/sftp/TestSFTPFileSystem.java
index e8eedd949e0..af48cb6169b 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/sftp/TestSFTPFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/sftp/TestSFTPFileSystem.java
@@ -34,16 +34,16 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.test.GenericTestUtils;
 
-import org.apache.sshd.SshServer;
+import org.apache.sshd.server.SshServer;
 import org.apache.sshd.common.NamedFactory;
 import org.apache.sshd.server.Command;
-import org.apache.sshd.server.PasswordAuthenticator;
-import org.apache.sshd.server.UserAuth;
-import org.apache.sshd.server.auth.UserAuthPassword;
+import org.apache.sshd.server.auth.password.PasswordAuthenticator;
+import org.apache.sshd.server.auth.UserAuth;
+import org.apache.sshd.server.auth.password.UserAuthPasswordFactory;
 import org.apache.sshd.server.keyprovider.SimpleGeneratorHostKeyProvider;
 import org.apache.sshd.server.session.ServerSession;
-import org.apache.sshd.server.sftp.SftpSubsystem;
 
+import org.apache.sshd.server.subsystem.sftp.SftpSubsystemFactory;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Rule;
@@ -76,7 +76,7 @@ private static void startSshdServer() throws IOException {
 
     List> userAuthFactories =
         new ArrayList>();
-    userAuthFactories.add(new UserAuthPassword.Factory());
+    userAuthFactories.add(new UserAuthPasswordFactory());
 
     sshd.setUserAuthFactories(userAuthFactories);
 
@@ -92,7 +92,7 @@ public boolean authenticate(String username, String password,
     });
 
     sshd.setSubsystemFactories(
-        Arrays.>asList(new SftpSubsystem.Factory()));
+        Arrays.>asList(new SftpSubsystemFactory()));
 
     sshd.start();
     port = sshd.getPort();
@@ -140,7 +140,7 @@ public static void tearDown() {
     if (sshd != null) {
       try {
         sshd.stop(true);
-      } catch (InterruptedException e) {
+      } catch (IOException e) {
         // ignore
       }
     }
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java
index 00cfa44f310..3ea9ab8fce5 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/LambdaTestUtils.java
@@ -70,7 +70,7 @@ public interface TimeoutHandler {
      * @throws Exception if the handler wishes to raise an exception
      * that way.
      */
-    Exception evaluate(int timeoutMillis, Exception caught) throws Exception;
+    Throwable evaluate(int timeoutMillis, Throwable caught) throws Throwable;
   }
 
   /**
@@ -116,7 +116,7 @@ public static int await(int timeoutMillis,
     Preconditions.checkNotNull(timeoutHandler);
 
     long endTime = Time.now() + timeoutMillis;
-    Exception ex = null;
+    Throwable ex = null;
     boolean running = true;
     int iterations = 0;
     while (running) {
@@ -128,9 +128,11 @@ public static int await(int timeoutMillis,
         // the probe failed but did not raise an exception. Reset any
         // exception raised by a previous probe failure.
         ex = null;
-      } catch (InterruptedException | FailFastException e) {
+      } catch (InterruptedException
+          | FailFastException
+          | VirtualMachineError e) {
         throw e;
-      } catch (Exception e) {
+      } catch (Throwable e) {
         LOG.debug("eventually() iteration {}", iterations, e);
         ex = e;
       }
@@ -145,15 +147,20 @@ public static int await(int timeoutMillis,
       }
     }
     // timeout
-    Exception evaluate = timeoutHandler.evaluate(timeoutMillis, ex);
-    if (evaluate == null) {
-      // bad timeout handler logic; fall back to GenerateTimeout so the
-      // underlying problem isn't lost.
-      LOG.error("timeout handler {} did not throw an exception ",
-          timeoutHandler);
-      evaluate = new GenerateTimeout().evaluate(timeoutMillis, ex);
+    Throwable evaluate;
+    try {
+      evaluate = timeoutHandler.evaluate(timeoutMillis, ex);
+      if (evaluate == null) {
+        // bad timeout handler logic; fall back to GenerateTimeout so the
+        // underlying problem isn't lost.
+        LOG.error("timeout handler {} did not throw an exception ",
+            timeoutHandler);
+        evaluate = new GenerateTimeout().evaluate(timeoutMillis, ex);
+      }
+    } catch (Throwable throwable) {
+      evaluate = throwable;
     }
-    throw evaluate;
+    return raise(evaluate);
   }
 
   /**
@@ -217,6 +224,7 @@ public static int await(int timeoutMillis,
    * @throws Exception the last exception thrown before timeout was triggered
    * @throws FailFastException if raised -without any retry attempt.
    * @throws InterruptedException if interrupted during the sleep operation.
+   * @throws OutOfMemoryError you've run out of memory.
    */
   public static  T eventually(int timeoutMillis,
       Callable eval,
@@ -224,7 +232,7 @@ public static  T eventually(int timeoutMillis,
     Preconditions.checkArgument(timeoutMillis >= 0,
         "timeoutMillis must be >= 0");
     long endTime = Time.now() + timeoutMillis;
-    Exception ex;
+    Throwable ex;
     boolean running;
     int sleeptime;
     int iterations = 0;
@@ -232,10 +240,12 @@ public static  T eventually(int timeoutMillis,
       iterations++;
       try {
         return eval.call();
-      } catch (InterruptedException | FailFastException e) {
+      } catch (InterruptedException
+          | FailFastException
+          | VirtualMachineError e) {
         // these two exceptions trigger an immediate exit
         throw e;
-      } catch (Exception e) {
+      } catch (Throwable e) {
         LOG.debug("evaluate() iteration {}", iterations, e);
         ex = e;
       }
@@ -245,7 +255,26 @@ public static  T eventually(int timeoutMillis,
       }
     } while (running);
     // timeout. Throw the last exception raised
-    throw ex;
+    return raise(ex);
+  }
+
+  /**
+   * Take the throwable and raise it as an exception or an error, depending
+   * upon its type. This allows callers to declare that they only throw
+   * Exception (i.e. can be invoked by Callable) yet still rethrow a
+   * previously caught Throwable.
+   * @param throwable Throwable to rethrow
+   * @param  expected return type
+   * @return never
+   * @throws Exception if throwable is an Exception
+   * @throws Error if throwable is not an Exception
+   */
+  private static  T raise(Throwable throwable) throws Exception {
+    if (throwable instanceof Exception) {
+      throw (Exception) throwable;
+    } else {
+      throw (Error) throwable;
+    }
   }
 
   /**
@@ -365,6 +394,7 @@ public static  E intercept(
    * @throws Exception any other exception raised
    * @throws AssertionError if the evaluation call didn't raise an exception.
    */
+  @SuppressWarnings("unchecked")
   public static  E intercept(
       Class clazz,
       VoidCallable eval)
@@ -487,14 +517,14 @@ public GenerateTimeout() {
      * @return TimeoutException
      */
     @Override
-    public Exception evaluate(int timeoutMillis, Exception caught)
-        throws Exception {
+    public Throwable evaluate(int timeoutMillis, Throwable caught)
+        throws Throwable {
       String s = String.format("%s: after %d millis", message,
           timeoutMillis);
       String caughtText = caught != null
           ? ("; " + robustToString(caught)) : "";
 
-      return (TimeoutException) (new TimeoutException(s + caughtText)
+      return (new TimeoutException(s + caughtText)
                                      .initCause(caught));
     }
   }
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestLambdaTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestLambdaTestUtils.java
index d3d5cb4fde3..c790a180ede 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestLambdaTestUtils.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestLambdaTestUtils.java
@@ -25,6 +25,7 @@
 import java.io.IOException;
 import java.util.concurrent.Callable;
 import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import static org.apache.hadoop.test.LambdaTestUtils.*;
 import static org.apache.hadoop.test.GenericTestUtils.*;
@@ -123,6 +124,27 @@ protected void assertMinRetryCount(int minCount) {
         minCount <= retry.getInvocationCount());
   }
 
+  /**
+   * Raise an exception.
+   * @param e exception to raise
+   * @return never
+   * @throws Exception passed in exception
+   */
+  private boolean r(Exception e) throws Exception {
+    throw e;
+  }
+
+  /**
+   * Raise an error.
+   * @param e error to raise
+   * @return never
+   * @throws Exception never
+   * @throws Error the passed in error
+   */
+  private boolean r(Error e) throws Exception {
+    throw e;
+  }
+
   @Test
   public void testAwaitAlwaysTrue() throws Throwable {
     await(TIMEOUT,
@@ -140,7 +162,7 @@ public void testAwaitAlwaysFalse() throws Throwable {
           TIMEOUT_FAILURE_HANDLER);
       fail("should not have got here");
     } catch (TimeoutException e) {
-      assertTrue(retry.getInvocationCount() > 4);
+      assertMinRetryCount(1);
     }
   }
 
@@ -316,9 +338,7 @@ public void testInterceptAwaitLambdaException() throws Throwable {
     IOException ioe = intercept(IOException.class,
         () -> await(
             TIMEOUT,
-            () -> {
-              throw new IOException("inner " + ++count);
-            },
+            () -> r(new IOException("inner " + ++count)),
             retry,
             (timeout, ex) -> ex));
     assertRetryCount(count - 1);
@@ -339,9 +359,7 @@ public void testInterceptAwaitLambdaDiagnostics() throws Throwable {
   public void testInterceptAwaitFailFastLambda() throws Throwable {
     intercept(FailFastException.class,
         () -> await(TIMEOUT,
-            () -> {
-              throw new FailFastException("ffe");
-            },
+            () -> r(new FailFastException("ffe")),
             retry,
             (timeout, ex) -> ex));
     assertRetryCount(0);
@@ -361,14 +379,13 @@ public void testEventuallyLambda() throws Throwable {
     assertRetryCount(0);
   }
 
+
   @Test
   public void testInterceptEventuallyLambdaFailures() throws Throwable {
     intercept(IOException.class,
         "oops",
         () -> eventually(TIMEOUT,
-            () -> {
-              throw new IOException("oops");
-            },
+            () -> r(new IOException("oops")),
             retry));
     assertMinRetryCount(1);
   }
@@ -385,11 +402,95 @@ public void testInterceptEventuallyLambdaFailFast() throws Throwable {
     intercept(FailFastException.class, "oops",
         () -> eventually(
             TIMEOUT,
-            () -> {
-              throw new FailFastException("oops");
-            },
+            () -> r(new FailFastException("oops")),
             retry));
     assertRetryCount(0);
   }
 
+  /**
+   * Verify that assertions trigger catch and retry.
+   * @throws Throwable if the code is broken
+   */
+  @Test
+  public void testEventuallySpinsOnAssertions() throws Throwable {
+    AtomicInteger counter = new AtomicInteger(0);
+    eventually(TIMEOUT,
+        () -> {
+          while (counter.incrementAndGet() < 5) {
+            fail("if you see this, we are in trouble");
+          }
+        },
+        retry);
+    assertMinRetryCount(4);
+  }
+
+  /**
+   * Verify that VirtualMachineError errors are immediately rethrown.
+   * @throws Throwable if the code is broken
+   */
+  @Test
+  public void testInterceptEventuallyThrowsVMErrors() throws Throwable {
+    intercept(OutOfMemoryError.class, "OOM",
+        () -> eventually(
+            TIMEOUT,
+            () -> r(new OutOfMemoryError("OOM")),
+            retry));
+    assertRetryCount(0);
+  }
+
+  /**
+   * Verify that you can declare that an intercept will intercept Errors.
+   * @throws Throwable if the code is broken
+   */
+  @Test
+  public void testInterceptHandlesErrors() throws Throwable {
+    intercept(OutOfMemoryError.class, "OOM",
+        () -> r(new OutOfMemoryError("OOM")));
+  }
+
+  /**
+   * Verify that if an Error raised is not the one being intercepted,
+   * it gets rethrown.
+   * @throws Throwable if the code is broken
+   */
+  @Test
+  public void testInterceptRethrowsVMErrors() throws Throwable {
+    intercept(StackOverflowError.class, "",
+        () -> intercept(OutOfMemoryError.class, "",
+            () -> r(new StackOverflowError())));
+  }
+
+  @Test
+  public void testAwaitHandlesAssertions() throws Throwable {
+    // await a state which is never reached, expect a timeout exception
+    // with the text "failure" in it
+    TimeoutException ex = intercept(TimeoutException.class,
+        "failure",
+        () -> await(TIMEOUT,
+            () -> r(new AssertionError("failure")),
+            retry,
+            TIMEOUT_FAILURE_HANDLER));
+
+    // the retry handler must have been invoked
+    assertMinRetryCount(1);
+    // and the nested cause is tha raised assertion
+    if (!(ex.getCause() instanceof AssertionError)) {
+      throw ex;
+    }
+  }
+
+  @Test
+  public void testAwaitRethrowsVMErrors() throws Throwable {
+    // await a state which is never reached, expect a timeout exception
+    // with the text "failure" in it
+    intercept(StackOverflowError.class,
+        () -> await(TIMEOUT,
+            () -> r(new StackOverflowError()),
+            retry,
+            TIMEOUT_FAILURE_HANDLER));
+
+    // the retry handler must not have been invoked
+    assertMinRetryCount(0);
+  }
+
 }
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestCpuTimeTracker.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestCpuTimeTracker.java
new file mode 100644
index 00000000000..6246672f0eb
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestCpuTimeTracker.java
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import org.junit.Test;
+import java.math.BigInteger;
+import static org.junit.Assert.assertTrue;
+
+public class TestCpuTimeTracker {
+  @Test
+  public void test() throws InterruptedException {
+    CpuTimeTracker tracker = new CpuTimeTracker(10);
+    tracker.updateElapsedJiffies(
+        BigInteger.valueOf(100),
+        System.currentTimeMillis());
+    float val1 = tracker.getCpuTrackerUsagePercent();
+    assertTrue(
+        "Not invalid CPU usage",
+        val1 == -1.0);
+    Thread.sleep(1000);
+    tracker.updateElapsedJiffies(
+        BigInteger.valueOf(200),
+        System.currentTimeMillis());
+    float val2 = tracker.getCpuTrackerUsagePercent();
+    assertTrue(
+        "Not positive CPU usage",
+        val2 > 0);
+    Thread.sleep(1000);
+    tracker.updateElapsedJiffies(
+        BigInteger.valueOf(0),
+        System.currentTimeMillis());
+    float val3 = tracker.getCpuTrackerUsagePercent();
+    assertTrue(
+        "Not positive CPU usage",
+        val3 == 0.0);
+  }
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/resources/core-site.xml b/hadoop-common-project/hadoop-common/src/test/resources/core-site.xml
index d85472cd402..d9144ebb1a9 100644
--- a/hadoop-common-project/hadoop-common/src/test/resources/core-site.xml
+++ b/hadoop-common-project/hadoop-common/src/test/resources/core-site.xml
@@ -45,12 +45,6 @@
    This is required by FTPFileSystem
 
 
-
-  test.fs.s3n.name
-  s3n:///
-  The name of the s3n file system for testing.
-
-
 
 
   hadoop.security.authentication
diff --git a/hadoop-common-project/hadoop-common/src/test/resources/jets3t.properties b/hadoop-common-project/hadoop-common/src/test/resources/jets3t.properties
deleted file mode 100644
index 09cc46396ab..00000000000
--- a/hadoop-common-project/hadoop-common/src/test/resources/jets3t.properties
+++ /dev/null
@@ -1,16 +0,0 @@
-#   Licensed under the Apache License, Version 2.0 (the "License");
-#   you may not use this file except in compliance with the License.
-#   You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License.
-
-# Speed up the s3native jets3t test
-
-s3service.max-thread-count=10
-threaded-service.max-thread-count=10
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java
index cd870ca5aee..01381953457 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java
@@ -115,6 +115,14 @@ public boolean delete(Path f, boolean recursive)
     return dfs.delete(getUriPath(f), recursive);
   }
 
+  /**
+   * The returned BlockLocation will have different formats for replicated
+   * and erasure coded file.
+   *
+   * Please refer to
+   * {@link FileContext#getFileBlockLocations(Path, long, long)}
+   * for more details.
+   */
   @Override
   public BlockLocation[] getFileBlockLocations(Path p, long start, long len)
       throws IOException, UnresolvedLinkException {
@@ -165,6 +173,13 @@ public FsServerDefaults getServerDefaults(final Path f) throws IOException {
     return dfs.getServerDefaults();
   }
 
+  /**
+   * The BlockLocation of returned LocatedFileStatus will have different
+   * formats for replicated and erasure coded file.
+   * Please refer to
+   * {@link FileContext#getFileBlockLocations(Path, long, long)} for
+   * more details.
+   */
   @Override
   public RemoteIterator listLocatedStatus(
       final Path p)
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index 9239df39c59..772049d35d7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -72,6 +72,7 @@
 import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.FileAlreadyExistsException;
 import org.apache.hadoop.fs.FileEncryptionInfo;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FsServerDefaults;
 import org.apache.hadoop.fs.FsStatus;
@@ -866,6 +867,10 @@ boolean recoverLease(String src) throws IOException {
    * data-placement when performing operations.  For example, the
    * MapReduce system tries to schedule tasks on the same machines
    * as the data-block the task processes.
+   *
+   * Please refer to
+   * {@link FileSystem#getFileBlockLocations(FileStatus, long, long)}
+   * for more details.
    */
   public BlockLocation[] getBlockLocations(String src, long start,
       long length) throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java
index 7f053380f8c..44db3a68245 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java
@@ -260,6 +260,7 @@ private void flipDataBuffers() {
 
   private final Coordinator coordinator;
   private final CellBuffers cellBuffers;
+  private final ErasureCodingPolicy ecPolicy;
   private final RawErasureEncoder encoder;
   private final List streamers;
   private final DFSPacket[] currentPackets; // current Packet of each streamer
@@ -286,7 +287,7 @@ private void flipDataBuffers() {
       LOG.debug("Creating DFSStripedOutputStream for " + src);
     }
 
-    final ErasureCodingPolicy ecPolicy = stat.getErasureCodingPolicy();
+    ecPolicy = stat.getErasureCodingPolicy();
     final int numParityBlocks = ecPolicy.getNumParityUnits();
     cellSize = ecPolicy.getCellSize();
     numDataBlocks = ecPolicy.getNumDataUnits();
@@ -478,11 +479,6 @@ private void allocateNewBlock() throws IOException {
     final LocatedBlock lb = addBlock(excludedNodes, dfsClient, src,
         currentBlockGroup, fileId, favoredNodes, getAddBlockFlags());
     assert lb.isStriped();
-    if (lb.getLocations().length < numDataBlocks) {
-      throw new IOException("Failed to get " + numDataBlocks
-          + " nodes from namenode: blockGroupSize= " + numAllBlocks
-          + ", blocks.length= " + lb.getLocations().length);
-    }
     // assign the new block to the current block group
     currentBlockGroup = lb.getBlock();
     blockGroupIndex++;
@@ -494,11 +490,16 @@ private void allocateNewBlock() throws IOException {
       StripedDataStreamer si = getStripedDataStreamer(i);
       assert si.isHealthy();
       if (blocks[i] == null) {
+        // allocBlock() should guarantee that all data blocks are successfully
+        // allocated.
+        assert i >= numDataBlocks;
         // Set exception and close streamer as there is no block locations
         // found for the parity block.
-        LOG.warn("Failed to get block location for parity block, index=" + i);
+        LOG.warn("Cannot allocate parity block(index={}, policy={}). " +
+            "Not enough datanodes? Exclude nodes={}", i,  ecPolicy.getName(),
+            excludedNodes);
         si.getLastException().set(
-            new IOException("Failed to get following block, i=" + i));
+            new IOException("Failed to get parity block, index=" + i));
         si.getErrorState().setInternalError();
         si.close(true);
       } else {
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
index 44caed60d60..f6331cf90d0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
@@ -240,6 +240,13 @@ public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
     return getFileBlockLocations(file.getPath(), start, len);
   }
 
+  /**
+   * The returned BlockLocation will have different formats for replicated
+   * and erasure coded file.
+   * Please refer to
+   * {@link FileSystem#getFileBlockLocations(FileStatus, long, long)}
+   * for more details.
+   */
   @Override
   public BlockLocation[] getFileBlockLocations(Path p,
       final long start, final long len) throws IOException {
@@ -1040,6 +1047,13 @@ public FileStatus[] next(final FileSystem fs, final Path p)
     }.resolve(this, absF);
   }
 
+  /**
+   * The BlockLocation of returned LocatedFileStatus will have different
+   * formats for replicated and erasure coded file.
+   * Please refer to
+   * {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} for
+   * more details.
+   */
   @Override
   protected RemoteIterator listLocatedStatus(final Path p,
       final PathFilter filter)
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/BlocksStats.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/BlocksStats.java
deleted file mode 100644
index 7eb30ca7f49..00000000000
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/BlocksStats.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hdfs.protocol;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.classification.InterfaceStability;
-
-/**
- * Get statistics pertaining to blocks of type {@link BlockType#CONTIGUOUS}
- * in the filesystem.
- * 

- * @see ClientProtocol#getBlocksStats() - */ -@InterfaceAudience.Public -@InterfaceStability.Evolving -public final class BlocksStats { - private final long lowRedundancyBlocksStat; - private final long corruptBlocksStat; - private final long missingBlocksStat; - private final long missingReplicationOneBlocksStat; - private final long bytesInFutureBlocksStat; - private final long pendingDeletionBlocksStat; - - public BlocksStats(long lowRedundancyBlocksStat, - long corruptBlocksStat, long missingBlocksStat, - long missingReplicationOneBlocksStat, long bytesInFutureBlocksStat, - long pendingDeletionBlocksStat) { - this.lowRedundancyBlocksStat = lowRedundancyBlocksStat; - this.corruptBlocksStat = corruptBlocksStat; - this.missingBlocksStat = missingBlocksStat; - this.missingReplicationOneBlocksStat = missingReplicationOneBlocksStat; - this.bytesInFutureBlocksStat = bytesInFutureBlocksStat; - this.pendingDeletionBlocksStat = pendingDeletionBlocksStat; - } - - public long getLowRedundancyBlocksStat() { - return lowRedundancyBlocksStat; - } - - public long getCorruptBlocksStat() { - return corruptBlocksStat; - } - - public long getMissingReplicaBlocksStat() { - return missingBlocksStat; - } - - public long getMissingReplicationOneBlocksStat() { - return missingReplicationOneBlocksStat; - } - - public long getBytesInFutureBlocksStat() { - return bytesInFutureBlocksStat; - } - - public long getPendingDeletionBlocksStat() { - return pendingDeletionBlocksStat; - } - - @Override - public String toString() { - StringBuilder statsBuilder = new StringBuilder(); - statsBuilder.append("ReplicatedBlocksStats=[") - .append("LowRedundancyBlocks=").append(getLowRedundancyBlocksStat()) - .append(", CorruptBlocks=").append(getCorruptBlocksStat()) - .append(", MissingReplicaBlocks=").append(getMissingReplicaBlocksStat()) - .append(", MissingReplicationOneBlocks=").append( - getMissingReplicationOneBlocksStat()) - .append(", BytesInFutureBlocks=").append(getBytesInFutureBlocksStat()) - .append(", PendingDeletionBlocks=").append( - getPendingDeletionBlocksStat()) - .append("]"); - return statsBuilder.toString(); - } -} diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java index b550467dd89..8d5503f9abb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java @@ -778,14 +778,14 @@ SnapshottableDirectoryStatus[] getSnapshottableDirListing() * in the filesystem. */ @Idempotent - BlocksStats getBlocksStats() throws IOException; + ReplicatedBlockStats getReplicatedBlockStats() throws IOException; /** * Get statistics pertaining to blocks of type {@link BlockType#STRIPED} * in the filesystem. */ @Idempotent - ECBlockGroupsStats getECBlockGroupsStats() throws IOException; + ECBlockGroupStats getECBlockGroupStats() throws IOException; /** * Get a report on the system's current datanodes. diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ECBlockGroupStats.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ECBlockGroupStats.java new file mode 100644 index 00000000000..9a8ad8cdb13 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ECBlockGroupStats.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.protocol; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Get statistics pertaining to blocks of type {@link BlockType#STRIPED} + * in the filesystem. + *

+ * @see ClientProtocol#getECBlockGroupStats() + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class ECBlockGroupStats { + private final long lowRedundancyBlockGroups; + private final long corruptBlockGroups; + private final long missingBlockGroups; + private final long bytesInFutureBlockGroups; + private final long pendingDeletionBlocks; + + public ECBlockGroupStats(long lowRedundancyBlockGroups, + long corruptBlockGroups, long missingBlockGroups, + long bytesInFutureBlockGroups, long pendingDeletionBlocks) { + this.lowRedundancyBlockGroups = lowRedundancyBlockGroups; + this.corruptBlockGroups = corruptBlockGroups; + this.missingBlockGroups = missingBlockGroups; + this.bytesInFutureBlockGroups = bytesInFutureBlockGroups; + this.pendingDeletionBlocks = pendingDeletionBlocks; + } + + public long getBytesInFutureBlockGroups() { + return bytesInFutureBlockGroups; + } + + public long getCorruptBlockGroups() { + return corruptBlockGroups; + } + + public long getLowRedundancyBlockGroups() { + return lowRedundancyBlockGroups; + } + + public long getMissingBlockGroups() { + return missingBlockGroups; + } + + public long getPendingDeletionBlocks() { + return pendingDeletionBlocks; + } + + @Override + public String toString() { + StringBuilder statsBuilder = new StringBuilder(); + statsBuilder.append("ECBlockGroupStats=[") + .append("LowRedundancyBlockGroups=").append( + getLowRedundancyBlockGroups()) + .append(", CorruptBlockGroups=").append(getCorruptBlockGroups()) + .append(", MissingBlockGroups=").append(getMissingBlockGroups()) + .append(", BytesInFutureBlockGroups=").append( + getBytesInFutureBlockGroups()) + .append(", PendingDeletionBlocks=").append( + getPendingDeletionBlocks()) + .append("]"); + return statsBuilder.toString(); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ECBlockGroupsStats.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ECBlockGroupsStats.java deleted file mode 100644 index 80cf262d8af..00000000000 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ECBlockGroupsStats.java +++ /dev/null @@ -1,83 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdfs.protocol; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; - -/** - * Get statistics pertaining to blocks of type {@link BlockType#STRIPED} - * in the filesystem. - *

- * @see ClientProtocol#getECBlockGroupsStats() - */ -@InterfaceAudience.Public -@InterfaceStability.Evolving -public final class ECBlockGroupsStats { - private final long lowRedundancyBlockGroupsStat; - private final long corruptBlockGroupsStat; - private final long missingBlockGroupsStat; - private final long bytesInFutureBlockGroupsStat; - private final long pendingDeletionBlockGroupsStat; - - public ECBlockGroupsStats(long lowRedundancyBlockGroupsStat, long - corruptBlockGroupsStat, long missingBlockGroupsStat, long - bytesInFutureBlockGroupsStat, long pendingDeletionBlockGroupsStat) { - this.lowRedundancyBlockGroupsStat = lowRedundancyBlockGroupsStat; - this.corruptBlockGroupsStat = corruptBlockGroupsStat; - this.missingBlockGroupsStat = missingBlockGroupsStat; - this.bytesInFutureBlockGroupsStat = bytesInFutureBlockGroupsStat; - this.pendingDeletionBlockGroupsStat = pendingDeletionBlockGroupsStat; - } - - public long getBytesInFutureBlockGroupsStat() { - return bytesInFutureBlockGroupsStat; - } - - public long getCorruptBlockGroupsStat() { - return corruptBlockGroupsStat; - } - - public long getLowRedundancyBlockGroupsStat() { - return lowRedundancyBlockGroupsStat; - } - - public long getMissingBlockGroupsStat() { - return missingBlockGroupsStat; - } - - public long getPendingDeletionBlockGroupsStat() { - return pendingDeletionBlockGroupsStat; - } - - @Override - public String toString() { - StringBuilder statsBuilder = new StringBuilder(); - statsBuilder.append("ECBlockGroupsStats=[") - .append("LowRedundancyBlockGroups=").append( - getLowRedundancyBlockGroupsStat()) - .append(", CorruptBlockGroups=").append(getCorruptBlockGroupsStat()) - .append(", MissingBlockGroups=").append(getMissingBlockGroupsStat()) - .append(", BytesInFutureBlockGroups=").append( - getBytesInFutureBlockGroupsStat()) - .append(", PendingDeletionBlockGroups=").append( - getPendingDeletionBlockGroupsStat()) - .append("]"); - return statsBuilder.toString(); - } -} diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java index b82a860cf4a..193aae25a25 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java @@ -78,6 +78,17 @@ public LocatedBlocks getBlockLocations() { return locations; } + /** + * This function is used to transform the underlying HDFS LocatedBlocks to + * BlockLocations. + * + * The returned BlockLocation will have different formats for replicated + * and erasure coded file. + * Please refer to + * {@link org.apache.hadoop.fs.FileSystem#getFileBlockLocations + * (FileStatus, long, long)} + * for examples. + */ public final LocatedFileStatus makeQualifiedLocated(URI defaultUri, Path path) { makeQualified(defaultUri, path); @@ -96,5 +107,4 @@ public int hashCode() { // satisfy findbugs return super.hashCode(); } - } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReplicatedBlockStats.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReplicatedBlockStats.java new file mode 100644 index 00000000000..49aadedcdec --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReplicatedBlockStats.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.protocol; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Get statistics pertaining to blocks of type {@link BlockType#CONTIGUOUS} + * in the filesystem. + *

+ * @see ClientProtocol#getReplicatedBlockStats() + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class ReplicatedBlockStats { + private final long lowRedundancyBlocks; + private final long corruptBlocks; + private final long missingBlocks; + private final long missingReplicationOneBlocks; + private final long bytesInFutureBlocks; + private final long pendingDeletionBlocks; + + public ReplicatedBlockStats(long lowRedundancyBlocks, + long corruptBlocks, long missingBlocks, + long missingReplicationOneBlocks, long bytesInFutureBlocks, + long pendingDeletionBlocks) { + this.lowRedundancyBlocks = lowRedundancyBlocks; + this.corruptBlocks = corruptBlocks; + this.missingBlocks = missingBlocks; + this.missingReplicationOneBlocks = missingReplicationOneBlocks; + this.bytesInFutureBlocks = bytesInFutureBlocks; + this.pendingDeletionBlocks = pendingDeletionBlocks; + } + + public long getLowRedundancyBlocks() { + return lowRedundancyBlocks; + } + + public long getCorruptBlocks() { + return corruptBlocks; + } + + public long getMissingReplicaBlocks() { + return missingBlocks; + } + + public long getMissingReplicationOneBlocks() { + return missingReplicationOneBlocks; + } + + public long getBytesInFutureBlocks() { + return bytesInFutureBlocks; + } + + public long getPendingDeletionBlocks() { + return pendingDeletionBlocks; + } + + @Override + public String toString() { + StringBuilder statsBuilder = new StringBuilder(); + statsBuilder.append("ReplicatedBlockStats=[") + .append("LowRedundancyBlocks=").append(getLowRedundancyBlocks()) + .append(", CorruptBlocks=").append(getCorruptBlocks()) + .append(", MissingReplicaBlocks=").append(getMissingReplicaBlocks()) + .append(", MissingReplicationOneBlocks=").append( + getMissingReplicationOneBlocks()) + .append(", BytesInFutureBlocks=").append(getBytesInFutureBlocks()) + .append(", PendingDeletionBlocks=").append( + getPendingDeletionBlocks()) + .append("]"); + return statsBuilder.toString(); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java index ec7d93f689c..209eee7b501 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java @@ -61,7 +61,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DirectoryListing; -import org.apache.hadoop.hdfs.protocol.ECBlockGroupsStats; +import org.apache.hadoop.hdfs.protocol.ECBlockGroupStats; import org.apache.hadoop.hdfs.protocol.EncryptionZone; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; @@ -73,7 +73,7 @@ import org.apache.hadoop.hdfs.protocol.LastBlockWithStatus; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; -import org.apache.hadoop.hdfs.protocol.BlocksStats; +import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats; import org.apache.hadoop.hdfs.protocol.OpenFileEntry; import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus; import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo; @@ -120,8 +120,8 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileInfoResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileLinkInfoRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileLinkInfoResponseProto; -import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupsStatsRequestProto; -import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsBlocksStatsRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupStatsRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsReplicatedBlockStatsRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsStatusRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetLinkTargetRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetLinkTargetResponseProto; @@ -246,13 +246,13 @@ public class ClientNamenodeProtocolTranslatorPB implements private final static GetFsStatusRequestProto VOID_GET_FSSTATUS_REQUEST = GetFsStatusRequestProto.newBuilder().build(); - private final static GetFsBlocksStatsRequestProto - VOID_GET_FS_REPLICABLOCKS_STATS_REQUEST = - GetFsBlocksStatsRequestProto.newBuilder().build(); + private final static GetFsReplicatedBlockStatsRequestProto + VOID_GET_FS_REPLICATED_BLOCK_STATS_REQUEST = + GetFsReplicatedBlockStatsRequestProto.newBuilder().build(); - private final static GetFsECBlockGroupsStatsRequestProto - VOID_GET_FS_ECBLOCKGROUPS_STATS_REQUEST = - GetFsECBlockGroupsStatsRequestProto.newBuilder().build(); + private final static GetFsECBlockGroupStatsRequestProto + VOID_GET_FS_ECBLOCKGROUP_STATS_REQUEST = + GetFsECBlockGroupStatsRequestProto.newBuilder().build(); private final static RollEditsRequestProto VOID_ROLLEDITS_REQUEST = RollEditsRequestProto.getDefaultInstance(); @@ -695,20 +695,20 @@ public long[] getStats() throws IOException { } @Override - public BlocksStats getBlocksStats() throws IOException { + public ReplicatedBlockStats getReplicatedBlockStats() throws IOException { try { - return PBHelperClient.convert(rpcProxy.getFsBlocksStats(null, - VOID_GET_FS_REPLICABLOCKS_STATS_REQUEST)); + return PBHelperClient.convert(rpcProxy.getFsReplicatedBlockStats(null, + VOID_GET_FS_REPLICATED_BLOCK_STATS_REQUEST)); } catch (ServiceException e) { throw ProtobufHelper.getRemoteException(e); } } @Override - public ECBlockGroupsStats getECBlockGroupsStats() throws IOException { + public ECBlockGroupStats getECBlockGroupStats() throws IOException { try { - return PBHelperClient.convert(rpcProxy.getFsECBlockGroupsStats(null, - VOID_GET_FS_ECBLOCKGROUPS_STATS_REQUEST)); + return PBHelperClient.convert(rpcProxy.getFsECBlockGroupStats(null, + VOID_GET_FS_ECBLOCKGROUP_STATS_REQUEST)); } catch (ServiceException e) { throw ProtobufHelper.getRemoteException(e); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java index 63a4271ea00..6dd65b1d230 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java @@ -76,7 +76,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates; import org.apache.hadoop.hdfs.protocol.DatanodeLocalInfo; import org.apache.hadoop.hdfs.protocol.DirectoryListing; -import org.apache.hadoop.hdfs.protocol.ECBlockGroupsStats; +import org.apache.hadoop.hdfs.protocol.ECBlockGroupStats; import org.apache.hadoop.hdfs.protocol.EncryptionZone; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyState; @@ -92,7 +92,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock; -import org.apache.hadoop.hdfs.protocol.BlocksStats; +import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats; import org.apache.hadoop.hdfs.protocol.OpenFileEntry; import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo; import org.apache.hadoop.hdfs.protocol.RollingUpgradeStatus; @@ -122,8 +122,8 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.DatanodeReportTypeProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.DatanodeStorageReportProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetEditsFromTxidResponseProto; -import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupsStatsResponseProto; -import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsBlocksStatsResponseProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupStatsResponseProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsReplicatedBlockStatsResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsStatsResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.OpenFilesBatchResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.RollingUpgradeActionProto; @@ -1810,17 +1810,17 @@ public static long[] convert(GetFsStatsResponseProto res) { return result; } - public static BlocksStats convert( - GetFsBlocksStatsResponseProto res) { - return new BlocksStats(res.getLowRedundancy(), + public static ReplicatedBlockStats convert( + GetFsReplicatedBlockStatsResponseProto res) { + return new ReplicatedBlockStats(res.getLowRedundancy(), res.getCorruptBlocks(), res.getMissingBlocks(), res.getMissingReplOneBlocks(), res.getBlocksInFuture(), res.getPendingDeletionBlocks()); } - public static ECBlockGroupsStats convert( - GetFsECBlockGroupsStatsResponseProto res) { - return new ECBlockGroupsStats(res.getLowRedundancy(), + public static ECBlockGroupStats convert( + GetFsECBlockGroupStatsResponseProto res) { + return new ECBlockGroupStats(res.getLowRedundancy(), res.getCorruptBlocks(), res.getMissingBlocks(), res.getBlocksInFuture(), res.getPendingDeletionBlocks()); } @@ -2236,37 +2236,37 @@ public static GetFsStatsResponseProto convert(long[] fsStats) { return result.build(); } - public static GetFsBlocksStatsResponseProto convert( - BlocksStats blocksStats) { - GetFsBlocksStatsResponseProto.Builder result = - GetFsBlocksStatsResponseProto.newBuilder(); + public static GetFsReplicatedBlockStatsResponseProto convert( + ReplicatedBlockStats replicatedBlockStats) { + GetFsReplicatedBlockStatsResponseProto.Builder result = + GetFsReplicatedBlockStatsResponseProto.newBuilder(); result.setLowRedundancy( - blocksStats.getLowRedundancyBlocksStat()); + replicatedBlockStats.getLowRedundancyBlocks()); result.setCorruptBlocks( - blocksStats.getCorruptBlocksStat()); + replicatedBlockStats.getCorruptBlocks()); result.setMissingBlocks( - blocksStats.getMissingReplicaBlocksStat()); + replicatedBlockStats.getMissingReplicaBlocks()); result.setMissingReplOneBlocks( - blocksStats.getMissingReplicationOneBlocksStat()); + replicatedBlockStats.getMissingReplicationOneBlocks()); result.setBlocksInFuture( - blocksStats.getBytesInFutureBlocksStat()); + replicatedBlockStats.getBytesInFutureBlocks()); result.setPendingDeletionBlocks( - blocksStats.getPendingDeletionBlocksStat()); + replicatedBlockStats.getPendingDeletionBlocks()); return result.build(); } - public static GetFsECBlockGroupsStatsResponseProto convert( - ECBlockGroupsStats ecBlockGroupsStats) { - GetFsECBlockGroupsStatsResponseProto.Builder result = - GetFsECBlockGroupsStatsResponseProto.newBuilder(); + public static GetFsECBlockGroupStatsResponseProto convert( + ECBlockGroupStats ecBlockGroupStats) { + GetFsECBlockGroupStatsResponseProto.Builder result = + GetFsECBlockGroupStatsResponseProto.newBuilder(); result.setLowRedundancy( - ecBlockGroupsStats.getLowRedundancyBlockGroupsStat()); - result.setCorruptBlocks(ecBlockGroupsStats.getCorruptBlockGroupsStat()); - result.setMissingBlocks(ecBlockGroupsStats.getMissingBlockGroupsStat()); + ecBlockGroupStats.getLowRedundancyBlockGroups()); + result.setCorruptBlocks(ecBlockGroupStats.getCorruptBlockGroups()); + result.setMissingBlocks(ecBlockGroupStats.getMissingBlockGroups()); result.setBlocksInFuture( - ecBlockGroupsStats.getBytesInFutureBlockGroupsStat()); + ecBlockGroupStats.getBytesInFutureBlockGroups()); result.setPendingDeletionBlocks( - ecBlockGroupsStats.getPendingDeletionBlockGroupsStat()); + ecBlockGroupStats.getPendingDeletionBlocks()); return result.build(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java index 7ec5fe5c15f..dcd73bfc7eb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java @@ -22,7 +22,6 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.ContentSummary.Builder; import org.apache.hadoop.fs.FileChecksum; @@ -645,56 +644,4 @@ private static StorageType[] toStorageTypes(List list) { } } - static BlockLocation[] toBlockLocationArray(Map json) - throws IOException{ - final Map rootmap = - (Map)json.get(BlockLocation.class.getSimpleName() + "s"); - final List array = JsonUtilClient.getList(rootmap, - BlockLocation.class.getSimpleName()); - - Preconditions.checkNotNull(array); - final BlockLocation[] locations = new BlockLocation[array.size()]; - int i = 0; - for (Object object : array) { - final Map m = (Map) object; - locations[i++] = JsonUtilClient.toBlockLocation(m); - } - return locations; - } - - /** Convert a Json map to BlockLocation. **/ - static BlockLocation toBlockLocation(Map m) - throws IOException{ - if(m == null) { - return null; - } - - long length = ((Number) m.get("length")).longValue(); - long offset = ((Number) m.get("offset")).longValue(); - boolean corrupt = Boolean. - getBoolean(m.get("corrupt").toString()); - String[] storageIds = toStringArray(getList(m, "storageIds")); - String[] cachedHosts = toStringArray(getList(m, "cachedHosts")); - String[] hosts = toStringArray(getList(m, "hosts")); - String[] names = toStringArray(getList(m, "names")); - String[] topologyPaths = toStringArray(getList(m, "topologyPaths")); - StorageType[] storageTypes = toStorageTypeArray( - getList(m, "storageTypes")); - return new BlockLocation(names, hosts, cachedHosts, - topologyPaths, storageIds, storageTypes, - offset, length, corrupt); - } - - static String[] toStringArray(List list) { - if (list == null) { - return null; - } else { - final String[] array = new String[list.size()]; - int i = 0; - for (Object object : list) { - array[i++] = object.toString(); - } - return array; - } - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java index 1159e50de7f..ee8d5c1c325 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java @@ -1616,68 +1616,14 @@ public BlockLocation[] getFileBlockLocations(final Path p, final long offset, final long length) throws IOException { statistics.incrementReadOps(1); storageStatistics.incrementOpCounter(OpType.GET_FILE_BLOCK_LOCATIONS); - BlockLocation[] locations = null; - try { - locations = getFileBlockLocations( - GetOpParam.Op.GETFILEBLOCKLOCATIONS, - p, offset, length); - } catch (RemoteException e) { - // See the error message from ExceptionHandle - if(e.getMessage() != null && - e.getMessage().contains( - "Invalid value for webhdfs parameter") && - e.getMessage().contains( - GetOpParam.Op.GETFILEBLOCKLOCATIONS.toString())) { - // Old webhdfs server doesn't support GETFILEBLOCKLOCATIONS - // operation, fall back to query again using old API - // GET_BLOCK_LOCATIONS. - LOG.info("Invalid webhdfs operation parameter " - + GetOpParam.Op.GETFILEBLOCKLOCATIONS + ". Fallback to use " - + GetOpParam.Op.GET_BLOCK_LOCATIONS + " instead."); - locations = getFileBlockLocations( - GetOpParam.Op.GET_BLOCK_LOCATIONS, - p, offset, length); - } - } - return locations; - } - /** - * Get file block locations implementation. Provide a operation - * parameter to determine how to get block locations from a webhdfs - * server. Older server only supports GET_BLOCK_LOCATIONS but - * not GETFILEBLOCKLOCATIONS. - * - * @param path path to the file - * @param offset start offset in the given file - * @param length of the file to get locations for - * @param operation - * Valid operation is either - * {@link org.apache.hadoop.hdfs.web.resources.GetOpParam.Op - * #GET_BLOCK_LOCATIONS} or - * {@link org.apache.hadoop.hdfs.web.resources.GetOpParam.Op - * #GET_BLOCK_LOCATIONS} - * @throws IOException - * Http connection error, decoding error or given - * operation is not valid - */ - @VisibleForTesting - protected BlockLocation[] getFileBlockLocations( - GetOpParam.Op operation, final Path path, - final long offset, final long length) throws IOException { - return new FsPathResponseRunner(operation, path, + final HttpOpParam.Op op = GetOpParam.Op.GET_BLOCK_LOCATIONS; + return new FsPathResponseRunner(op, p, new OffsetParam(offset), new LengthParam(length)) { @Override BlockLocation[] decodeResponse(Map json) throws IOException { - switch(operation) { - case GETFILEBLOCKLOCATIONS: - return JsonUtilClient.toBlockLocationArray(json); - case GET_BLOCK_LOCATIONS: - return DFSUtilClient.locatedBlocks2Locations( - JsonUtilClient.toLocatedBlocks(json)); - default : - throw new IOException("Unknown operation " + operation.name()); - } + return DFSUtilClient.locatedBlocks2Locations( + JsonUtilClient.toLocatedBlocks(json)); } }.run(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java index d32af330ae7..6c2c674ad15 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java @@ -33,18 +33,8 @@ public enum Op implements HttpOpParam.Op { GETHOMEDIRECTORY(false, HttpURLConnection.HTTP_OK), GETDELEGATIONTOKEN(false, HttpURLConnection.HTTP_OK, true), - /** - * GET_BLOCK_LOCATIONS is a private/stable API op. It returns a - * {@link org.apache.hadoop.hdfs.protocol.LocatedBlocks} - * json object. - */ + /** GET_BLOCK_LOCATIONS is a private unstable op. */ GET_BLOCK_LOCATIONS(false, HttpURLConnection.HTTP_OK), - /** - * GETFILEBLOCKLOCATIONS is the public op that complies with - * {@link org.apache.hadoop.fs.FileSystem#getFileBlockLocations} - * interface. - */ - GETFILEBLOCKLOCATIONS(false, HttpURLConnection.HTTP_OK), GETACLSTATUS(false, HttpURLConnection.HTTP_OK), GETXATTRS(false, HttpURLConnection.HTTP_OK), GETTRASHROOT(false, HttpURLConnection.HTTP_OK), diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto index 3f108fa718b..6db6ad0804c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto @@ -327,10 +327,10 @@ message GetFsStatsResponseProto { optional uint64 pending_deletion_blocks = 9; } -message GetFsBlocksStatsRequestProto { // no input paramters +message GetFsReplicatedBlockStatsRequestProto { // no input paramters } -message GetFsBlocksStatsResponseProto { +message GetFsReplicatedBlockStatsResponseProto { required uint64 low_redundancy = 1; required uint64 corrupt_blocks = 2; required uint64 missing_blocks = 3; @@ -339,10 +339,10 @@ message GetFsBlocksStatsResponseProto { required uint64 pending_deletion_blocks = 6; } -message GetFsECBlockGroupsStatsRequestProto { // no input paramters +message GetFsECBlockGroupStatsRequestProto { // no input paramters } -message GetFsECBlockGroupsStatsResponseProto { +message GetFsECBlockGroupStatsResponseProto { required uint64 low_redundancy = 1; required uint64 corrupt_blocks = 2; required uint64 missing_blocks = 3; @@ -831,10 +831,10 @@ service ClientNamenodeProtocol { rpc recoverLease(RecoverLeaseRequestProto) returns(RecoverLeaseResponseProto); rpc getFsStats(GetFsStatusRequestProto) returns(GetFsStatsResponseProto); - rpc getFsBlocksStats(GetFsBlocksStatsRequestProto) - returns (GetFsBlocksStatsResponseProto); - rpc getFsECBlockGroupsStats(GetFsECBlockGroupsStatsRequestProto) - returns (GetFsECBlockGroupsStatsResponseProto); + rpc getFsReplicatedBlockStats(GetFsReplicatedBlockStatsRequestProto) + returns (GetFsReplicatedBlockStatsResponseProto); + rpc getFsECBlockGroupStats(GetFsECBlockGroupStatsRequestProto) + returns (GetFsECBlockGroupStatsResponseProto); rpc getDatanodeReport(GetDatanodeReportRequestProto) returns(GetDatanodeReportResponseProto); rpc getDatanodeStorageReport(GetDatanodeStorageReportRequestProto) diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java index 1059a02f127..b5880e95bf1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java @@ -23,12 +23,9 @@ import java.util.EnumSet; import java.util.List; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.type.MapType; import com.google.common.base.Charsets; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.DelegationTokenRenewer; import org.apache.hadoop.fs.FSDataInputStream; @@ -122,8 +119,6 @@ public class HttpFSFileSystem extends FileSystem public static final String NEW_LENGTH_PARAM = "newlength"; public static final String START_AFTER_PARAM = "startAfter"; public static final String POLICY_NAME_PARAM = "storagepolicy"; - public static final String OFFSET_PARAM = "offset"; - public static final String LENGTH_PARAM = "length"; public static final String SNAPSHOT_NAME_PARAM = "snapshotname"; public static final String OLD_SNAPSHOT_NAME_PARAM = "oldsnapshotname"; @@ -210,7 +205,6 @@ public static FILE_TYPE getType(FileStatus fileStatus) { public static final String STORAGE_POLICIES_JSON = "BlockStoragePolicies"; public static final String STORAGE_POLICY_JSON = "BlockStoragePolicy"; - public static final String BLOCK_LOCATIONS_JSON = "BlockLocations"; public static final int HTTP_TEMPORARY_REDIRECT = 307; @@ -1359,42 +1353,6 @@ public BlockStoragePolicy getStoragePolicy(Path src) throws IOException { return createStoragePolicy((JSONObject) json.get(STORAGE_POLICY_JSON)); } - @Override - public BlockLocation[] getFileBlockLocations(FileStatus file, long start, - long len) throws IOException { - Map params = new HashMap(); - params.put(OP_PARAM, Operation.GETFILEBLOCKLOCATIONS.toString()); - params.put(OFFSET_PARAM, Long.toString(start)); - params.put(LENGTH_PARAM, Long.toString(len)); - HttpURLConnection conn = - getConnection(Operation.GETFILEBLOCKLOCATIONS.getMethod(), params, - file.getPath(), true); - HttpExceptionUtils.validateResponse(conn, HttpURLConnection.HTTP_OK); - JSONObject json = (JSONObject) HttpFSUtils.jsonParse(conn); - return toBlockLocations(json); - } - - private BlockLocation[] toBlockLocations(JSONObject json) - throws IOException { - ObjectMapper mapper = new ObjectMapper(); - MapType subType = mapper.getTypeFactory().constructMapType( - Map.class, - String.class, - BlockLocation[].class); - MapType rootType = mapper.getTypeFactory().constructMapType( - Map.class, - mapper.constructType(String.class), - mapper.constructType(subType)); - - Map> jsonMap = mapper - .readValue(json.toJSONString(), rootType); - Map locationMap = jsonMap - .get(BLOCK_LOCATIONS_JSON); - BlockLocation[] locationArray = locationMap.get( - BlockLocation.class.getSimpleName()); - return locationArray; - } - private BlockStoragePolicy createStoragePolicy(JSONObject policyJson) throws IOException { byte id = ((Number) policyJson.get("id")).byteValue(); diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/FSOperations.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/FSOperations.java index 4b5918abf50..a08bc54b0b0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/FSOperations.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/FSOperations.java @@ -18,7 +18,6 @@ package org.apache.hadoop.fs.http.server; import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.BlockStoragePolicySpi; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileChecksum; @@ -36,7 +35,6 @@ import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; -import org.apache.hadoop.hdfs.web.JsonUtil; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.lib.service.FileSystemAccess; import org.apache.hadoop.util.StringUtils; @@ -1458,41 +1456,6 @@ public Void execute(FileSystem fs) throws IOException { } } - /** - * Executor that performs a getFileBlockLocations FileSystemAccess - * file system operation. - */ - @InterfaceAudience.Private - @SuppressWarnings("rawtypes") - public static class FSFileBlockLocations implements - FileSystemAccess.FileSystemExecutor { - private Path path; - private long offsetValue; - private long lengthValue; - - /** - * Creates a file-block-locations executor. - * - * @param path the path to retrieve the location - * @param offsetValue offset into the given file - * @param lengthValue length for which to get locations for - */ - public FSFileBlockLocations(String path, long offsetValue, - long lengthValue) { - this.path = new Path(path); - this.offsetValue = offsetValue; - this.lengthValue = lengthValue; - } - - @Override - public Map execute(FileSystem fs) throws IOException { - BlockLocation[] locations = - fs.getFileBlockLocations(this.path, this.offsetValue, - this.lengthValue); - return JsonUtil.toJsonMap(locations); - } - } - /** * Executor that performs a createSnapshot FileSystemAccess operation. */ @@ -1596,5 +1559,4 @@ public Void execute(FileSystem fs) throws IOException { return null; } } - } diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSParametersProvider.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSParametersProvider.java index 5f265c09852..3e6a5adfe49 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSParametersProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSParametersProvider.java @@ -58,8 +58,7 @@ public class HttpFSParametersProvider extends ParametersProvider { PARAMS_DEF.put(Operation.GETHOMEDIRECTORY, new Class[]{}); PARAMS_DEF.put(Operation.GETCONTENTSUMMARY, new Class[]{}); PARAMS_DEF.put(Operation.GETFILECHECKSUM, new Class[]{}); - PARAMS_DEF.put(Operation.GETFILEBLOCKLOCATIONS, - new Class[] {OffsetParam.class, LenParam.class}); + PARAMS_DEF.put(Operation.GETFILEBLOCKLOCATIONS, new Class[]{}); PARAMS_DEF.put(Operation.GETACLSTATUS, new Class[]{}); PARAMS_DEF.put(Operation.GETTRASHROOT, new Class[]{}); PARAMS_DEF.put(Operation.INSTRUMENTATION, new Class[]{}); diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServer.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServer.java index 03ccb4caa04..bcc11820f25 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServer.java @@ -51,7 +51,6 @@ import org.apache.hadoop.fs.http.server.HttpFSParametersProvider.XAttrNameParam; import org.apache.hadoop.fs.http.server.HttpFSParametersProvider.XAttrSetFlagParam; import org.apache.hadoop.fs.http.server.HttpFSParametersProvider.XAttrValueParam; -import org.apache.hadoop.hdfs.web.JsonUtil; import org.apache.hadoop.http.JettyUtils; import org.apache.hadoop.lib.service.FileSystemAccess; import org.apache.hadoop.lib.service.FileSystemAccessException; @@ -299,25 +298,7 @@ public InputStream run() throws Exception { break; } case GETFILEBLOCKLOCATIONS: { - long offset = 0; - // In case length is not given, reset to max long - // in order to retrieve all file block locations - long len = Long.MAX_VALUE; - Long offsetParam = params.get(OffsetParam.NAME, OffsetParam.class); - Long lenParam = params.get(LenParam.NAME, LenParam.class); - AUDIT_LOG.info("[{}] offset [{}] len [{}]", - new Object[] {path, offsetParam, lenParam}); - if (offsetParam != null && offsetParam.longValue() > 0) { - offset = offsetParam.longValue(); - } - if (lenParam != null && lenParam.longValue() > 0) { - len = lenParam.longValue(); - } - FSOperations.FSFileBlockLocations command = - new FSOperations.FSFileBlockLocations(path, offset, len); - @SuppressWarnings("rawtypes") Map locations = fsExecute(user, command); - final String json = JsonUtil.toJsonString("BlockLocations", locations); - response = Response.ok(json).type(MediaType.APPLICATION_JSON).build(); + response = Response.status(Response.Status.BAD_REQUEST).build(); break; } case GETACLSTATUS: { diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java index 2cd89344aa8..a6dce4da108 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/client/BaseTestHttpFSWith.java @@ -20,7 +20,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockStoragePolicySpi; -import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileChecksum; @@ -1041,7 +1040,7 @@ protected enum Operation { WORKING_DIRECTORY, MKDIRS, SET_TIMES, SET_PERMISSION, SET_OWNER, SET_REPLICATION, CHECKSUM, CONTENT_SUMMARY, FILEACLS, DIRACLS, SET_XATTR, GET_XATTRS, REMOVE_XATTR, LIST_XATTRS, ENCRYPTION, LIST_STATUS_BATCH, - GETTRASHROOT, STORAGEPOLICY, ERASURE_CODING, GETFILEBLOCKLOCATIONS, + GETTRASHROOT, STORAGEPOLICY, ERASURE_CODING, CREATE_SNAPSHOT, RENAME_SNAPSHOT, DELETE_SNAPSHOT } @@ -1131,9 +1130,6 @@ private void operation(Operation op) throws Exception { case ERASURE_CODING: testErasureCoding(); break; - case GETFILEBLOCKLOCATIONS: - testGetFileBlockLocations(); - break; case CREATE_SNAPSHOT: testCreateSnapshot(); break; @@ -1189,88 +1185,6 @@ public Void run() throws Exception { }); } - private void testGetFileBlockLocations() throws Exception { - BlockLocation[] locations1, locations2, locations11, locations21 = null; - Path testFile = null; - - // Test single block file block locations. - try (FileSystem fs = FileSystem.get(getProxiedFSConf())) { - testFile = new Path(getProxiedFSTestDir(), "singleBlock.txt"); - DFSTestUtil.createFile(fs, testFile, (long) 1, (short) 1, 0L); - locations1 = fs.getFileBlockLocations(testFile, 0, 1); - Assert.assertNotNull(locations1); - } - - try (FileSystem fs = getHttpFSFileSystem()) { - locations2 = fs.getFileBlockLocations(testFile, 0, 1); - Assert.assertNotNull(locations2); - } - - verifyBlockLocations(locations1, locations2); - - // Test multi-block single replica file block locations. - try (FileSystem fs = FileSystem.get(getProxiedFSConf())) { - testFile = new Path(getProxiedFSTestDir(), "multipleBlocks.txt"); - DFSTestUtil.createFile(fs, testFile, 512, (short) 2048, - (long) 512, (short) 1, 0L); - locations1 = fs.getFileBlockLocations(testFile, 0, 1024); - locations11 = fs.getFileBlockLocations(testFile, 1024, 2048); - Assert.assertNotNull(locations1); - Assert.assertNotNull(locations11); - } - - try (FileSystem fs = getHttpFSFileSystem()) { - locations2 = fs.getFileBlockLocations(testFile, 0, 1024); - locations21 = fs.getFileBlockLocations(testFile, 1024, 2048); - Assert.assertNotNull(locations2); - Assert.assertNotNull(locations21); - } - - verifyBlockLocations(locations1, locations2); - verifyBlockLocations(locations11, locations21); - - // Test multi-block multi-replica file block locations. - try (FileSystem fs = FileSystem.get(getProxiedFSConf())) { - testFile = new Path(getProxiedFSTestDir(), "multipleBlocks.txt"); - DFSTestUtil.createFile(fs, testFile, 512, (short) 2048, - (long) 512, (short) 3, 0L); - locations1 = fs.getFileBlockLocations(testFile, 0, 2048); - Assert.assertNotNull(locations1); - } - - try (FileSystem fs = getHttpFSFileSystem()) { - locations2 = fs.getFileBlockLocations(testFile, 0, 2048); - Assert.assertNotNull(locations2); - } - - verifyBlockLocations(locations1, locations2); - } - - private void verifyBlockLocations(BlockLocation[] locations1, - BlockLocation[] locations2) throws IOException { - Assert.assertEquals(locations1.length, locations2.length); - for (int i = 0; i < locations1.length; i++) { - BlockLocation location1 = locations1[i]; - BlockLocation location2 = locations2[i]; - - Assert.assertEquals(location1.isCorrupt(), location2.isCorrupt()); - Assert.assertEquals(location1.getOffset(), location2.getOffset()); - Assert.assertEquals(location1.getLength(), location2.getLength()); - - Arrays.sort(location1.getHosts()); - Arrays.sort(location2.getHosts()); - Arrays.sort(location1.getNames()); - Arrays.sort(location2.getNames()); - Arrays.sort(location1.getTopologyPaths()); - Arrays.sort(location2.getTopologyPaths()); - - Assert.assertArrayEquals(location1.getHosts(), location2.getHosts()); - Assert.assertArrayEquals(location1.getNames(), location2.getNames()); - Assert.assertArrayEquals(location1.getTopologyPaths(), - location2.getTopologyPaths()); - } - } - private void testCreateSnapshot(String snapshotName) throws Exception { if (!this.isLocalFS()) { Path snapshottablePath = new Path("/tmp/tmp-snap-test"); @@ -1363,5 +1277,4 @@ private void testDeleteSnapshot() throws Exception { fs.delete(snapshottablePath, true); } } - } diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/test/TestHdfsHelper.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/test/TestHdfsHelper.java index 251193a9581..62f197246f5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/test/TestHdfsHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/test/TestHdfsHelper.java @@ -167,8 +167,6 @@ private static synchronized MiniDFSCluster startMiniHdfs(Configuration conf) thr new Path(helper.getTestRootDir(), "test.jks").toUri(); conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_KEY_PROVIDER_PATH, jceksPath); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - ERASURE_CODING_POLICY.getName()); MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf); int totalDataNodes = ERASURE_CODING_POLICY.getNumDataUnits() + ERASURE_CODING_POLICY.getNumParityUnits(); @@ -178,6 +176,7 @@ private static synchronized MiniDFSCluster startMiniHdfs(Configuration conf) thr DFSTestUtil.createKey(testkey, miniHdfs, conf); DistributedFileSystem fileSystem = miniHdfs.getFileSystem(); + fileSystem.enableErasureCodingPolicy(ERASURE_CODING_POLICY.getName()); fileSystem.getClient().setKeyProvider(miniHdfs.getNameNode() .getNamesystem().getProvider()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index d06e378c713..91f3bb9d07f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -563,22 +563,18 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_DATANODE_DISK_CHECK_TIMEOUT_DEFAULT = "10m"; - public static final String DFS_NAMENODE_EC_POLICIES_ENABLED_KEY = "dfs.namenode.ec.policies.enabled"; - public static final String DFS_NAMENODE_EC_POLICIES_ENABLED_DEFAULT = ""; public static final String DFS_NAMENODE_EC_POLICIES_MAX_CELLSIZE_KEY = "dfs.namenode.ec.policies.max.cellsize"; public static final int DFS_NAMENODE_EC_POLICIES_MAX_CELLSIZE_DEFAULT = 4 * 1024 * 1024; public static final String DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY = "dfs.namenode.ec.system.default.policy"; public static final String DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY_DEFAULT = "RS-6-3-1024k"; - public static final String DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_THREADS_KEY = "dfs.datanode.ec.reconstruction.stripedread.threads"; - public static final int DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_THREADS_DEFAULT = 20; public static final String DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_BUFFER_SIZE_KEY = "dfs.datanode.ec.reconstruction.stripedread.buffer.size"; public static final int DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_BUFFER_SIZE_DEFAULT = 64 * 1024; public static final String DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_TIMEOUT_MILLIS_KEY = "dfs.datanode.ec.reconstruction.stripedread.timeout.millis"; public static final int DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_TIMEOUT_MILLIS_DEFAULT = 5000; //5s - public static final String DFS_DN_EC_RECONSTRUCTION_STRIPED_BLK_THREADS_KEY = "dfs.datanode.ec.reconstruction.stripedblock.threads.size"; - public static final int DFS_DN_EC_RECONSTRUCTION_STRIPED_BLK_THREADS_DEFAULT = 8; + public static final String DFS_DN_EC_RECONSTRUCTION_THREADS_KEY = "dfs.datanode.ec.reconstruction.threads"; + public static final int DFS_DN_EC_RECONSTRUCTION_THREADS_DEFAULT = 8; public static final String DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY = diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java index 44d5216a267..a79e75f39d4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java @@ -124,12 +124,12 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileInfoResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileLinkInfoRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileLinkInfoResponseProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupStatsRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupStatsResponseProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsReplicatedBlockStatsRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsReplicatedBlockStatsResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsStatsResponseProto; -import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsBlocksStatsResponseProto; -import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupsStatsResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsStatusRequestProto; -import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsBlocksStatsRequestProto; -import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsECBlockGroupsStatsRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetLinkTargetRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetLinkTargetResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetListingRequestProto; @@ -763,22 +763,22 @@ public GetFsStatsResponseProto getFsStats(RpcController controller, } @Override - public GetFsBlocksStatsResponseProto getFsBlocksStats( - RpcController controller, GetFsBlocksStatsRequestProto request) + public GetFsReplicatedBlockStatsResponseProto getFsReplicatedBlockStats( + RpcController controller, GetFsReplicatedBlockStatsRequestProto request) throws ServiceException { try { - return PBHelperClient.convert(server.getBlocksStats()); + return PBHelperClient.convert(server.getReplicatedBlockStats()); } catch (IOException e) { throw new ServiceException(e); } } @Override - public GetFsECBlockGroupsStatsResponseProto getFsECBlockGroupsStats( - RpcController controller, GetFsECBlockGroupsStatsRequestProto request) + public GetFsECBlockGroupStatsResponseProto getFsECBlockGroupStats( + RpcController controller, GetFsECBlockGroupStatsRequestProto request) throws ServiceException { try { - return PBHelperClient.convert(server.getECBlockGroupsStats()); + return PBHelperClient.convert(server.getECBlockGroupStats()); } catch (IOException e) { throw new ServiceException(e); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java index dc3231827e2..dee74e6fcfd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java @@ -24,7 +24,7 @@ import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.util.StopWatch; -import org.apache.hadoop.util.Time; +import org.apache.hadoop.util.Timer; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; @@ -35,6 +35,7 @@ import com.google.protobuf.Message; import com.google.protobuf.TextFormat; + /** * Represents a set of calls for which a quorum of results is needed. * @param a key used to identify each of the outgoing calls @@ -60,11 +61,12 @@ class QuorumCall { * fraction of the configured timeout for any call. */ private static final float WAIT_PROGRESS_WARN_THRESHOLD = 0.7f; - private final StopWatch quorumStopWatch = new StopWatch(); + private final StopWatch quorumStopWatch; + private final Timer timer; static QuorumCall create( - Map> calls) { - final QuorumCall qr = new QuorumCall(); + Map> calls, Timer timer) { + final QuorumCall qr = new QuorumCall(timer); for (final Entry> e : calls.entrySet()) { Preconditions.checkArgument(e.getValue() != null, "null future for key: " + e.getKey()); @@ -82,18 +84,53 @@ public void onSuccess(RESULT res) { } return qr; } - - private QuorumCall() { - // Only instantiated from factory method above + + static QuorumCall create( + Map> calls) { + return create(calls, new Timer()); } + /** + * Not intended for outside use. + */ + private QuorumCall() { + this(new Timer()); + } + + private QuorumCall(Timer timer) { + // Only instantiated from factory method above + this.timer = timer; + this.quorumStopWatch = new StopWatch(timer); + } + + /** + * Used in conjunction with {@link #getQuorumTimeoutIncreaseMillis(long, int)} + * to check for pauses. + */ private void restartQuorumStopWatch() { quorumStopWatch.reset().start(); } - private boolean shouldIncreaseQuorumTimeout(long offset, int millis) { + /** + * Check for a pause (e.g. GC) since the last time + * {@link #restartQuorumStopWatch()} was called. If detected, return the + * length of the pause; else, -1. + * @param offset Offset the elapsed time by this amount; use if some amount + * of pause was expected + * @param millis Total length of timeout in milliseconds + * @return Length of pause, if detected, else -1 + */ + private long getQuorumTimeoutIncreaseMillis(long offset, int millis) { long elapsed = quorumStopWatch.now(TimeUnit.MILLISECONDS); - return elapsed + offset > (millis * WAIT_PROGRESS_INFO_THRESHOLD); + long pauseTime = elapsed + offset; + if (pauseTime > (millis * WAIT_PROGRESS_INFO_THRESHOLD)) { + QuorumJournalManager.LOG.info("Pause detected while waiting for " + + "QuorumCall response; increasing timeout threshold by pause time " + + "of " + pauseTime + " ms."); + return pauseTime; + } else { + return -1; + } } @@ -119,7 +156,7 @@ public synchronized void waitFor( int minResponses, int minSuccesses, int maxExceptions, int millis, String operationName) throws InterruptedException, TimeoutException { - long st = Time.monotonicNow(); + long st = timer.monotonicNow(); long nextLogTime = st + (long)(millis * WAIT_PROGRESS_INFO_THRESHOLD); long et = st + millis; while (true) { @@ -128,7 +165,7 @@ public synchronized void waitFor( if (minResponses > 0 && countResponses() >= minResponses) return; if (minSuccesses > 0 && countSuccesses() >= minSuccesses) return; if (maxExceptions >= 0 && countExceptions() > maxExceptions) return; - long now = Time.monotonicNow(); + long now = timer.monotonicNow(); if (now > nextLogTime) { long waited = now - st; @@ -154,8 +191,9 @@ public synchronized void waitFor( long rem = et - now; if (rem <= 0) { // Increase timeout if a full GC occurred after restarting stopWatch - if (shouldIncreaseQuorumTimeout(0, millis)) { - et = et + millis; + long timeoutIncrease = getQuorumTimeoutIncreaseMillis(0, millis); + if (timeoutIncrease > 0) { + et += timeoutIncrease; } else { throw new TimeoutException(); } @@ -165,8 +203,9 @@ public synchronized void waitFor( rem = Math.max(rem, 1); wait(rem); // Increase timeout if a full GC occurred after restarting stopWatch - if (shouldIncreaseQuorumTimeout(-rem, millis)) { - et = et + millis; + long timeoutIncrease = getQuorumTimeoutIncreaseMillis(-rem, millis); + if (timeoutIncrease > 0) { + et += timeoutIncrease; } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java index 6056e34d8f3..f56848cfdee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java @@ -157,27 +157,36 @@ public int run(String[] args) throws Exception { */ public void start() throws IOException { Preconditions.checkState(!isStarted(), "JN already running"); - - validateAndCreateJournalDir(localDir); - - DefaultMetricsSystem.initialize("JournalNode"); - JvmMetrics.create("JournalNode", - conf.get(DFSConfigKeys.DFS_METRICS_SESSION_ID_KEY), - DefaultMetricsSystem.instance()); - InetSocketAddress socAddr = JournalNodeRpcServer.getAddress(conf); - SecurityUtil.login(conf, DFSConfigKeys.DFS_JOURNALNODE_KEYTAB_FILE_KEY, - DFSConfigKeys.DFS_JOURNALNODE_KERBEROS_PRINCIPAL_KEY, socAddr.getHostName()); - - registerJNMXBean(); - - httpServer = new JournalNodeHttpServer(conf, this); - httpServer.start(); + try { - httpServerURI = httpServer.getServerURI().toString(); + validateAndCreateJournalDir(localDir); - rpcServer = new JournalNodeRpcServer(conf, this); - rpcServer.start(); + DefaultMetricsSystem.initialize("JournalNode"); + JvmMetrics.create("JournalNode", + conf.get(DFSConfigKeys.DFS_METRICS_SESSION_ID_KEY), + DefaultMetricsSystem.instance()); + + InetSocketAddress socAddr = JournalNodeRpcServer.getAddress(conf); + SecurityUtil.login(conf, DFSConfigKeys.DFS_JOURNALNODE_KEYTAB_FILE_KEY, + DFSConfigKeys.DFS_JOURNALNODE_KERBEROS_PRINCIPAL_KEY, + socAddr.getHostName()); + + registerJNMXBean(); + + httpServer = new JournalNodeHttpServer(conf, this); + httpServer.start(); + + httpServerURI = httpServer.getServerURI().toString(); + + rpcServer = new JournalNodeRpcServer(conf, this); + rpcServer.start(); + } catch (IOException ioe) { + //Shutdown JournalNode of JournalNodeRpcServer fails to start + LOG.error("Failed to start JournalNode.", ioe); + this.stop(1); + throw ioe; + } } public boolean isStarted() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index e83cbc6ef4a..f33ec63cf58 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -233,47 +233,47 @@ public long getNumTimedOutPendingReconstructions() { /** Used by metrics. */ public long getLowRedundancyBlocks() { - return neededReconstruction.getLowRedundancyBlocksStat(); + return neededReconstruction.getLowRedundancyBlocks(); } /** Used by metrics. */ public long getCorruptBlocks() { - return corruptReplicas.getCorruptBlocksStat(); + return corruptReplicas.getCorruptBlocks(); } /** Used by metrics. */ public long getMissingBlocks() { - return neededReconstruction.getCorruptBlocksStat(); + return neededReconstruction.getCorruptBlocks(); } /** Used by metrics. */ public long getMissingReplicationOneBlocks() { - return neededReconstruction.getCorruptReplicationOneBlocksStat(); + return neededReconstruction.getCorruptReplicationOneBlocks(); } /** Used by metrics. */ public long getPendingDeletionReplicatedBlocks() { - return invalidateBlocks.getBlocksStat(); + return invalidateBlocks.getBlocks(); } /** Used by metrics. */ public long getLowRedundancyECBlockGroups() { - return neededReconstruction.getLowRedundancyECBlockGroupsStat(); + return neededReconstruction.getLowRedundancyECBlockGroups(); } /** Used by metrics. */ public long getCorruptECBlockGroups() { - return corruptReplicas.getCorruptECBlockGroupsStat(); + return corruptReplicas.getCorruptECBlockGroups(); } /** Used by metrics. */ public long getMissingECBlockGroups() { - return neededReconstruction.getCorruptECBlockGroupsStat(); + return neededReconstruction.getCorruptECBlockGroups(); } /** Used by metrics. */ - public long getPendingDeletionECBlockGroups() { - return invalidateBlocks.getECBlockGroupsStat(); + public long getPendingDeletionECBlocks() { + return invalidateBlocks.getECBlocks(); } /** @@ -748,7 +748,7 @@ public void metaSave(PrintWriter out) { invalidateBlocks.dump(out); //Dump corrupt blocks and their storageIDs - Set corruptBlocks = corruptReplicas.getCorruptBlocks(); + Set corruptBlocks = corruptReplicas.getCorruptBlocksSet(); out.println("Corrupt Blocks:"); for(Block block : corruptBlocks) { Collection corruptNodes = @@ -2057,6 +2057,7 @@ public DatanodeStorageInfo[] chooseTarget4NewBlock(final String src, final List favoredNodes, final byte storagePolicyID, final BlockType blockType, + final ErasureCodingPolicy ecPolicy, final EnumSet flags) throws IOException { List favoredDatanodeDescriptors = getDatanodeDescriptors(favoredNodes); @@ -2067,14 +2068,23 @@ public DatanodeStorageInfo[] chooseTarget4NewBlock(final String src, final DatanodeStorageInfo[] targets = blockplacement.chooseTarget(src, numOfReplicas, client, excludedNodes, blocksize, favoredDatanodeDescriptors, storagePolicy, flags); - if (targets.length < minReplication) { - throw new IOException("File " + src + " could only be replicated to " - + targets.length + " nodes instead of minReplication (=" - + minReplication + "). There are " - + getDatanodeManager().getNetworkTopology().getNumOfLeaves() - + " datanode(s) running and " - + (excludedNodes == null? "no": excludedNodes.size()) - + " node(s) are excluded in this operation."); + + final String errorMessage = "File %s could only be written to %d of " + + "the %d %s. There are %d datanode(s) running and %s " + + "node(s) are excluded in this operation."; + if (blockType == BlockType.CONTIGUOUS && targets.length < minReplication) { + throw new IOException(String.format(errorMessage, src, + targets.length, minReplication, "minReplication nodes", + getDatanodeManager().getNetworkTopology().getNumOfLeaves(), + (excludedNodes == null? "no": excludedNodes.size()))); + } else if (blockType == BlockType.STRIPED && + targets.length < ecPolicy.getNumDataUnits()) { + throw new IOException( + String.format(errorMessage, src, targets.length, + ecPolicy.getNumDataUnits(), + String.format("required nodes for %s", ecPolicy.getName()), + getDatanodeManager().getNetworkTopology().getNumOfLeaves(), + (excludedNodes == null ? "no" : excludedNodes.size()))); } return targets; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java index d158b640142..7a576bb9b84 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java @@ -240,7 +240,7 @@ long[] getCorruptBlockIdsForTesting(BlockType blockType, * method to get the set of corrupt blocks in corruptReplicasMap. * @return Set of Block objects */ - Set getCorruptBlocks() { + Set getCorruptBlocksSet() { Set corruptBlocks = new HashSet(); corruptBlocks.addAll(corruptReplicasMap.keySet()); return corruptBlocks; @@ -267,11 +267,11 @@ String getCorruptReason(Block block, DatanodeDescriptor node) { } } - long getCorruptBlocksStat() { + long getCorruptBlocks() { return totalCorruptBlocks.longValue(); } - long getCorruptECBlockGroupsStat() { + long getCorruptECBlockGroups() { return totalCorruptECBlockGroups.longValue(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java index 7b6b8a924ca..75561caabac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java @@ -53,9 +53,9 @@ class InvalidateBlocks { private final Map> nodeToBlocks = new HashMap<>(); private final Map> - nodeToECBlockGroups = new HashMap<>(); + nodeToECBlocks = new HashMap<>(); private final LongAdder numBlocks = new LongAdder(); - private final LongAdder numECBlockGroups = new LongAdder(); + private final LongAdder numECBlocks = new LongAdder(); private final int blockInvalidateLimit; /** @@ -87,7 +87,7 @@ private void printBlockDeletionTime(final Logger log) { * @return The total number of blocks to be invalidated. */ long numBlocks() { - return getECBlockGroupsStat() + getBlocksStat(); + return getECBlocks() + getBlocks(); } /** @@ -95,7 +95,7 @@ long numBlocks() { * {@link org.apache.hadoop.hdfs.protocol.BlockType#CONTIGUOUS} * to be invalidated. */ - long getBlocksStat() { + long getBlocks() { return numBlocks.longValue(); } @@ -104,8 +104,8 @@ long getBlocksStat() { * {@link org.apache.hadoop.hdfs.protocol.BlockType#STRIPED} * to be invalidated. */ - long getECBlockGroupsStat() { - return numECBlockGroups.longValue(); + long getECBlocks() { + return numECBlocks.longValue(); } private LightWeightHashSet getBlocksSet(final DatanodeInfo dn) { @@ -115,9 +115,9 @@ private LightWeightHashSet getBlocksSet(final DatanodeInfo dn) { return null; } - private LightWeightHashSet getECBlockGroupsSet(final DatanodeInfo dn) { - if (nodeToECBlockGroups.containsKey(dn)) { - return nodeToECBlockGroups.get(dn); + private LightWeightHashSet getECBlocksSet(final DatanodeInfo dn) { + if (nodeToECBlocks.containsKey(dn)) { + return nodeToECBlocks.get(dn); } return null; } @@ -125,7 +125,7 @@ private LightWeightHashSet getECBlockGroupsSet(final DatanodeInfo dn) { private LightWeightHashSet getBlocksSet(final DatanodeInfo dn, final Block block) { if (BlockIdManager.isStripedBlockID(block.getBlockId())) { - return getECBlockGroupsSet(dn); + return getECBlocksSet(dn); } else { return getBlocksSet(dn); } @@ -134,8 +134,8 @@ private LightWeightHashSet getBlocksSet(final DatanodeInfo dn, private void putBlocksSet(final DatanodeInfo dn, final Block block, final LightWeightHashSet set) { if (BlockIdManager.isStripedBlockID(block.getBlockId())) { - assert getECBlockGroupsSet(dn) == null; - nodeToECBlockGroups.put(dn, set); + assert getECBlocksSet(dn) == null; + nodeToECBlocks.put(dn, set); } else { assert getBlocksSet(dn) == null; nodeToBlocks.put(dn, set); @@ -144,7 +144,7 @@ private void putBlocksSet(final DatanodeInfo dn, final Block block, private long getBlockSetsSize(final DatanodeInfo dn) { LightWeightHashSet replicaBlocks = getBlocksSet(dn); - LightWeightHashSet stripedBlocks = getECBlockGroupsSet(dn); + LightWeightHashSet stripedBlocks = getECBlocksSet(dn); return ((replicaBlocks == null ? 0 : replicaBlocks.size()) + (stripedBlocks == null ? 0 : stripedBlocks.size())); } @@ -179,7 +179,7 @@ synchronized void add(final Block block, final DatanodeInfo datanode, } if (set.add(block)) { if (BlockIdManager.isStripedBlockID(block.getBlockId())) { - numECBlockGroups.increment(); + numECBlocks.increment(); } else { numBlocks.increment(); } @@ -196,9 +196,9 @@ synchronized void remove(final DatanodeInfo dn) { if (replicaBlockSets != null) { numBlocks.add(replicaBlockSets.size() * -1); } - LightWeightHashSet blockGroupSets = nodeToECBlockGroups.remove(dn); - if (blockGroupSets != null) { - numECBlockGroups.add(blockGroupSets.size() * -1); + LightWeightHashSet ecBlocksSet = nodeToECBlocks.remove(dn); + if (ecBlocksSet != null) { + numECBlocks.add(ecBlocksSet.size() * -1); } } @@ -207,7 +207,7 @@ synchronized void remove(final DatanodeInfo dn, final Block block) { final LightWeightHashSet v = getBlocksSet(dn, block); if (v != null && v.remove(block)) { if (BlockIdManager.isStripedBlockID(block.getBlockId())) { - numECBlockGroups.decrement(); + numECBlocks.decrement(); } else { numBlocks.decrement(); } @@ -231,21 +231,21 @@ private void dumpBlockSet(final Map getDatanodes() { HashSet set = new HashSet<>(); set.addAll(nodeToBlocks.keySet()); - set.addAll(nodeToECBlockGroups.keySet()); + set.addAll(nodeToECBlocks.keySet()); return new ArrayList<>(set); } @@ -289,9 +289,9 @@ synchronized List invalidateWork(final DatanodeDescriptor dn) { remainingLimit = getBlocksToInvalidateByLimit(nodeToBlocks.get(dn), toInvalidate, numBlocks, remainingLimit); } - if ((remainingLimit > 0) && (nodeToECBlockGroups.get(dn) != null)) { - getBlocksToInvalidateByLimit(nodeToECBlockGroups.get(dn), - toInvalidate, numECBlockGroups, remainingLimit); + if ((remainingLimit > 0) && (nodeToECBlocks.get(dn) != null)) { + getBlocksToInvalidateByLimit(nodeToECBlocks.get(dn), + toInvalidate, numECBlocks, remainingLimit); } if (toInvalidate.size() > 0 && getBlockSetsSize(dn) == 0) { remove(dn); @@ -302,8 +302,8 @@ synchronized List invalidateWork(final DatanodeDescriptor dn) { synchronized void clear() { nodeToBlocks.clear(); - nodeToECBlockGroups.clear(); + nodeToECBlocks.clear(); numBlocks.reset(); - numECBlockGroups.reset(); + numECBlocks.reset(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java index af2cb7ef037..347d606a04e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java @@ -144,33 +144,33 @@ synchronized int getCorruptBlockSize() { /** Return the number of corrupt blocks with replication factor 1 */ long getCorruptReplicationOneBlockSize() { - return getCorruptReplicationOneBlocksStat(); + return getCorruptReplicationOneBlocks(); } /** * Return under replicated block count excluding corrupt replicas. */ - long getLowRedundancyBlocksStat() { - return lowRedundancyBlocks.longValue() - getCorruptBlocksStat(); + long getLowRedundancyBlocks() { + return lowRedundancyBlocks.longValue() - getCorruptBlocks(); } - long getCorruptBlocksStat() { + long getCorruptBlocks() { return corruptBlocks.longValue(); } - long getCorruptReplicationOneBlocksStat() { + long getCorruptReplicationOneBlocks() { return corruptReplicationOneBlocks.longValue(); } /** * Return low redundancy striped blocks excluding corrupt blocks. */ - long getLowRedundancyECBlockGroupsStat() { + long getLowRedundancyECBlockGroups() { return lowRedundancyECBlockGroups.longValue() - - getCorruptECBlockGroupsStat(); + getCorruptECBlockGroups(); } - long getCorruptECBlockGroupsStat() { + long getCorruptECBlockGroups() { return corruptECBlockGroups.longValue(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 8264206764f..9096831a49f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -3054,8 +3054,16 @@ void transferReplicaForPipelineRecovery(final ExtendedBlock b, b.setNumBytes(visible); if (targets.length > 0) { - new Daemon(new DataTransfer(targets, targetStorageTypes, - targetStorageIds, b, stage, client)).start(); + Daemon daemon = new Daemon(threadGroup, + new DataTransfer(targets, targetStorageTypes, targetStorageIds, b, + stage, client)); + daemon.start(); + try { + daemon.join(); + } catch (InterruptedException e) { + throw new IOException( + "Pipeline recovery for " + b + " is interrupted.", e); + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java index 72c224f2f77..07d213c55b6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java @@ -53,19 +53,19 @@ public ErasureCodingWorker(Configuration conf, DataNode datanode) { this.datanode = datanode; this.conf = conf; - initializeStripedReadThreadPool(conf.getInt( - DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_THREADS_KEY, - DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_STRIPED_READ_THREADS_DEFAULT)); + initializeStripedReadThreadPool(); initializeStripedBlkReconstructionThreadPool(conf.getInt( - DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_STRIPED_BLK_THREADS_KEY, - DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_STRIPED_BLK_THREADS_DEFAULT)); + DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_THREADS_KEY, + DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_THREADS_DEFAULT)); } - private void initializeStripedReadThreadPool(int num) { - LOG.debug("Using striped reads; pool threads={}", num); + private void initializeStripedReadThreadPool() { + LOG.debug("Using striped reads"); - stripedReadPool = new ThreadPoolExecutor(1, num, 60, TimeUnit.SECONDS, - new SynchronousQueue(), + // Essentially, this is a cachedThreadPool. + stripedReadPool = new ThreadPoolExecutor(0, Integer.MAX_VALUE, + 60, TimeUnit.SECONDS, + new SynchronousQueue<>(), new Daemon.DaemonFactory() { private final AtomicInteger threadIndex = new AtomicInteger(0); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java index bac013aea29..34e58ae47b0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java @@ -22,6 +22,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics; +import org.apache.hadoop.util.Time; /** * StripedBlockReconstructor reconstruct one or more missed striped block in @@ -83,18 +84,28 @@ void reconstruct() throws IOException { final int toReconstructLen = (int) Math.min(getStripedReader().getBufferSize(), remaining); + long start = Time.monotonicNow(); // step1: read from minimum source DNs required for reconstruction. // The returned success list is the source DNs we do real read from getStripedReader().readMinimumSources(toReconstructLen); + long readEnd = Time.monotonicNow(); // step2: decode to reconstruct targets reconstructTargets(toReconstructLen); + long decodeEnd = Time.monotonicNow(); // step3: transfer data if (stripedWriter.transferData2Targets() == 0) { String error = "Transfer failed for all targets."; throw new IOException(error); } + long writeEnd = Time.monotonicNow(); + + // Only the succeed reconstructions are recorded. + final DataNodeMetrics metrics = getDatanode().getMetrics(); + metrics.incrECReconstructionReadTime(readEnd - start); + metrics.incrECReconstructionDecodingTime(decodeEnd - readEnd); + metrics.incrECReconstructionWriteTime(writeEnd - decodeEnd); updatePositionInBlock(toReconstructLen); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java index a8a691980bb..58a2f65f4e3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java @@ -151,6 +151,12 @@ public class DataNodeMetrics { MutableCounterLong ecReconstructionBytesWritten; @Metric("Bytes remote read by erasure coding worker") MutableCounterLong ecReconstructionRemoteBytesRead; + @Metric("Milliseconds spent on read by erasure coding worker") + private MutableCounterLong ecReconstructionReadTimeMillis; + @Metric("Milliseconds spent on decoding by erasure coding worker") + private MutableCounterLong ecReconstructionDecodingTimeMillis; + @Metric("Milliseconds spent on write by erasure coding worker") + private MutableCounterLong ecReconstructionWriteTimeMillis; final MetricsRegistry registry = new MetricsRegistry("datanode"); final String name; @@ -503,4 +509,16 @@ public void incrECReconstructionRemoteBytesRead(long bytes) { public void incrECReconstructionBytesWritten(long bytes) { ecReconstructionBytesWritten.incr(bytes); } + + public void incrECReconstructionReadTime(long millis) { + ecReconstructionReadTimeMillis.incr(millis); + } + + public void incrECReconstructionWriteTime(long millis) { + ecReconstructionWriteTimeMillis.incr(millis); + } + + public void incrECReconstructionDecodingTime(long millis) { + ecReconstructionDecodingTimeMillis.incr(millis); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java index 4c757097301..90699b43a49 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ErasureCodingPolicyManager.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hdfs.server.namenode; import com.google.common.annotations.VisibleForTesting; -import org.apache.commons.lang.ArrayUtils; +import com.google.common.base.Preconditions; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -101,15 +101,10 @@ public static ErasureCodingPolicyManager getInstance() { private ErasureCodingPolicyManager() {} public void init(Configuration conf) { - // Populate the list of enabled policies from configuration - final String[] enablePolicyNames = conf.getTrimmedStrings( - DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_DEFAULT); + // Load erasure coding default policy final String defaultPolicyName = conf.getTrimmed( DFSConfigKeys.DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY, DFSConfigKeys.DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY_DEFAULT); - final String[] policyNames = - (String[]) ArrayUtils.add(enablePolicyNames, defaultPolicyName); this.policiesByName = new TreeMap<>(); this.policiesByID = new TreeMap<>(); this.enabledPoliciesByName = new TreeMap<>(); @@ -129,11 +124,8 @@ public void init(Configuration conf) { policiesByID.put(policy.getId(), policy); } - for (String policyName : policyNames) { - if (policyName.trim().isEmpty()) { - continue; - } - ErasureCodingPolicy ecPolicy = policiesByName.get(policyName); + if (!defaultPolicyName.trim().isEmpty()) { + ErasureCodingPolicy ecPolicy = policiesByName.get(defaultPolicyName); if (ecPolicy == null) { String names = policiesByName.values() .stream().map(ErasureCodingPolicy::getName) @@ -141,8 +133,8 @@ public void init(Configuration conf) { String msg = String.format("EC policy '%s' specified at %s is not a " + "valid policy. Please choose from list of available " + "policies: [%s]", - policyName, - DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, + defaultPolicyName, + DFSConfigKeys.DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY, names); throw new HadoopIllegalArgumentException(msg); } @@ -250,14 +242,15 @@ public synchronized ErasureCodingPolicy addPolicy( policy.getSchema(), policy.getCellSize()); for (ErasureCodingPolicy p : getPolicies()) { if (p.getName().equals(assignedNewName)) { - throw new HadoopIllegalArgumentException("The policy name " + - assignedNewName + " already exists"); + LOG.info("The policy name " + assignedNewName + " already exists"); + return p; } if (p.getSchema().equals(policy.getSchema()) && p.getCellSize() == policy.getCellSize()) { - throw new HadoopIllegalArgumentException("A policy with same schema " + LOG.info("A policy with same schema " + policy.getSchema().toString() + " and cell size " + p.getCellSize() + " already exists"); + return p; } } policy.setName(assignedNewName); @@ -298,6 +291,11 @@ public synchronized void removePolicy(String name) { } ecPolicy.setState(ErasureCodingPolicyState.REMOVED); LOG.info("Remove erasure coding policy " + name); + + /* + * TODO HDFS-12405 postpone the delete removed policy to Namenode restart + * time. + * */ } @VisibleForTesting @@ -347,4 +345,36 @@ public synchronized void enablePolicy(String name) { enabledPoliciesByName.values().toArray(new ErasureCodingPolicy[0]); LOG.info("Enable the erasure coding policy " + name); } + + /** + * Load an erasure coding policy into erasure coding manager. + */ + private void loadPolicy(ErasureCodingPolicy policy) { + if (!CodecUtil.hasCodec(policy.getCodecName()) || + policy.getCellSize() > maxCellSize) { + // If policy is not supported in current system, set the policy state to + // DISABLED; + policy.setState(ErasureCodingPolicyState.DISABLED); + } + + this.policiesByName.put(policy.getName(), policy); + this.policiesByID.put(policy.getId(), policy); + if (policy.isEnabled()) { + enablePolicy(policy.getName()); + } + } + + /** + * Reload erasure coding policies from fsImage. + * + * @param ecPolicies contains ErasureCodingPolicy list + * + */ + public synchronized void loadPolicies(List ecPolicies) { + Preconditions.checkNotNull(ecPolicies); + for (ErasureCodingPolicy p : ecPolicies) { + loadPolicy(p); + } + allPolicies = policiesByName.values().toArray(new ErasureCodingPolicy[0]); + } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java index ee2b0f430bf..e284b1581db 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java @@ -32,7 +32,6 @@ import org.apache.hadoop.crypto.CryptoProtocolVersion; import org.apache.hadoop.crypto.key.KeyProvider; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; -import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.CryptoExtension; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion; import org.apache.hadoop.fs.FileEncryptionInfo; import org.apache.hadoop.fs.FileStatus; @@ -698,9 +697,7 @@ static String getCurrentKeyVersion(final FSDirectory dir, final String zone) // drain the local cache of the key provider. // Do not invalidateCache on the server, since that's the responsibility // when rolling the key version. - if (dir.getProvider() instanceof CryptoExtension) { - ((CryptoExtension) dir.getProvider()).drain(keyName); - } + dir.getProvider().drain(keyName); final EncryptedKeyVersion edek; try { edek = dir.getProvider().generateEncryptedKey(keyName); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirErasureCodingOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirErasureCodingOp.java index deb03afd851..181b1478502 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirErasureCodingOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirErasureCodingOp.java @@ -24,7 +24,6 @@ import org.apache.hadoop.fs.XAttr; import org.apache.hadoop.fs.XAttrSetFlag; import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.XAttrHelper; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.server.namenode.FSDirectory.DirOp; @@ -80,11 +79,10 @@ static ErasureCodingPolicy getErasureCodingPolicyByName( .collect(Collectors.joining(", ")); final String message = String.format("Policy '%s' does not match any " + "enabled erasure" + - " coding policies: [%s]. The set of enabled erasure coding " + - "policies can be configured at '%s'.", + " coding policies: [%s]. An erasure coding policy can be" + + " enabled by enableErasureCodingPolicy API.", ecPolicyName, - sysPolicies, - DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY + sysPolicies ); throw new HadoopIllegalArgumentException(message); } @@ -210,10 +208,22 @@ static FileStatus unsetErasureCodingPolicy(final FSNamesystem fsn, return fsd.getAuditFileInfo(iip); } - static ErasureCodingPolicy addErasureCodePolicy(final FSNamesystem fsn, - ErasureCodingPolicy policy) { + /** + * Add an erasure coding policy. + * + * @param fsn namespace + * @param policy the new policy to be added into system + * @param logRetryCache whether to record RPC ids in editlog for retry cache + * rebuilding + * @throws IOException + */ + static ErasureCodingPolicy addErasureCodingPolicy(final FSNamesystem fsn, + ErasureCodingPolicy policy, final boolean logRetryCache) { Preconditions.checkNotNull(policy); - return fsn.getErasureCodingPolicyManager().addPolicy(policy); + ErasureCodingPolicy retPolicy = + fsn.getErasureCodingPolicyManager().addPolicy(policy); + fsn.getEditLog().logAddErasureCodingPolicy(policy, logRetryCache); + return retPolicy; } /** @@ -221,24 +231,47 @@ static ErasureCodingPolicy addErasureCodePolicy(final FSNamesystem fsn, * * @param fsn namespace * @param ecPolicyName the name of the policy to be removed + * @param logRetryCache whether to record RPC ids in editlog for retry cache + * rebuilding * @throws IOException */ - static void removeErasureCodePolicy(final FSNamesystem fsn, - String ecPolicyName) throws IOException { + static void removeErasureCodingPolicy(final FSNamesystem fsn, + String ecPolicyName, final boolean logRetryCache) throws IOException { Preconditions.checkNotNull(ecPolicyName); fsn.getErasureCodingPolicyManager().removePolicy(ecPolicyName); + fsn.getEditLog().logRemoveErasureCodingPolicy(ecPolicyName, logRetryCache); } - static void enableErasureCodePolicy(final FSNamesystem fsn, - String ecPolicyName) throws IOException { + /** + * Enable an erasure coding policy. + * + * @param fsn namespace + * @param ecPolicyName the name of the policy to be enabled + * @param logRetryCache whether to record RPC ids in editlog for retry cache + * rebuilding + * @throws IOException + */ + static void enableErasureCodingPolicy(final FSNamesystem fsn, + String ecPolicyName, final boolean logRetryCache) throws IOException { Preconditions.checkNotNull(ecPolicyName); fsn.getErasureCodingPolicyManager().enablePolicy(ecPolicyName); + fsn.getEditLog().logEnableErasureCodingPolicy(ecPolicyName, logRetryCache); } - static void disableErasureCodePolicy(final FSNamesystem fsn, - String ecPolicyName) throws IOException { + /** + * Disable an erasure coding policy. + * + * @param fsn namespace + * @param ecPolicyName the name of the policy to be disabled + * @param logRetryCache whether to record RPC ids in editlog for retry cache + * rebuilding + * @throws IOException + */ + static void disableErasureCodingPolicy(final FSNamesystem fsn, + String ecPolicyName, final boolean logRetryCache) throws IOException { Preconditions.checkNotNull(ecPolicyName); fsn.getErasureCodingPolicyManager().disablePolicy(ecPolicyName); + fsn.getEditLog().logDisableErasureCodingPolicy(ecPolicyName, logRetryCache); } private static List removeErasureCodingPolicyXAttr( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java index 012e916f913..b2022126693 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java @@ -201,7 +201,7 @@ static ValidateAddBlockResult validateAddBlock( } storagePolicyID = pendingFile.getStoragePolicyID(); return new ValidateAddBlockResult(blockSize, numTargets, storagePolicyID, - clientMachine, blockType); + clientMachine, blockType, ecPolicy); } static LocatedBlock makeLocatedBlock(FSNamesystem fsn, BlockInfo blk, @@ -286,7 +286,7 @@ static DatanodeStorageInfo[] chooseTargetForNewBlock( return bm.chooseTarget4NewBlock(src, r.numTargets, clientNode, excludedNodesSet, r.blockSize, favoredNodesList, r.storagePolicyID, - r.blockType, flags); + r.blockType, r.ecPolicy, flags); } /** @@ -831,20 +831,28 @@ private static class FileState { } static class ValidateAddBlockResult { - final long blockSize; - final int numTargets; - final byte storagePolicyID; - final String clientMachine; - final BlockType blockType; + private final long blockSize; + private final int numTargets; + private final byte storagePolicyID; + private final String clientMachine; + private final BlockType blockType; + private final ErasureCodingPolicy ecPolicy; ValidateAddBlockResult( long blockSize, int numTargets, byte storagePolicyID, - String clientMachine, BlockType blockType) { + String clientMachine, BlockType blockType, + ErasureCodingPolicy ecPolicy) { this.blockSize = blockSize; this.numTargets = numTargets; this.storagePolicyID = storagePolicyID; this.clientMachine = clientMachine; this.blockType = blockType; + this.ecPolicy = ecPolicy; + + if (blockType == BlockType.STRIPED) { + Preconditions.checkArgument(ecPolicy != null, + "ecPolicy is not specified for striped block"); + } } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java index d3f4447c2c4..a8f5bfaa32e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; import org.apache.hadoop.hdfs.protocol.CachePoolInfo; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; @@ -97,6 +98,10 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TruncateOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddErasureCodingPolicyOp; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.EnableErasureCodingPolicyOp; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DisableErasureCodingPolicyOp; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveErasureCodingPolicyOp; import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream; import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; @@ -1228,6 +1233,38 @@ void logRemoveXAttrs(String src, List xAttrs, boolean toLogRpcIds) { logEdit(op); } + void logAddErasureCodingPolicy(ErasureCodingPolicy ecPolicy, + boolean toLogRpcIds) { + AddErasureCodingPolicyOp op = + AddErasureCodingPolicyOp.getInstance(cache.get()); + op.setErasureCodingPolicy(ecPolicy); + logRpcIds(op, toLogRpcIds); + logEdit(op); + } + + void logEnableErasureCodingPolicy(String ecPolicyName, boolean toLogRpcIds) { + EnableErasureCodingPolicyOp op = + EnableErasureCodingPolicyOp.getInstance(cache.get()); + op.setErasureCodingPolicy(ecPolicyName); + logRpcIds(op, toLogRpcIds); + logEdit(op); + } + + void logDisableErasureCodingPolicy(String ecPolicyName, boolean toLogRpcIds) { + DisableErasureCodingPolicyOp op = + DisableErasureCodingPolicyOp.getInstance(cache.get()); + op.setErasureCodingPolicy(ecPolicyName); + logRpcIds(op, toLogRpcIds); + logEdit(op); + } + + void logRemoveErasureCodingPolicy(String ecPolicyName, boolean toLogRpcIds) { + RemoveErasureCodingPolicyOp op = + RemoveErasureCodingPolicyOp.getInstance(cache.get()); + op.setErasureCodingPolicy(ecPolicyName); + logRpcIds(op, toLogRpcIds); + logEdit(op); + } /** * Get all the journals this edit log is currently operating on. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index bc62a7ef66c..a21b8ea914a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -96,6 +96,14 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TruncateOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp + .AddErasureCodingPolicyOp; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp + .RemoveErasureCodingPolicyOp; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp + .EnableErasureCodingPolicyOp; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp + .DisableErasureCodingPolicyOp; import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; @@ -958,6 +966,41 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, setStoragePolicyOp.policyId); break; } + case OP_ADD_ERASURE_CODING_POLICY: + AddErasureCodingPolicyOp addOp = (AddErasureCodingPolicyOp) op; + fsNamesys.getErasureCodingPolicyManager().addPolicy( + addOp.getEcPolicy()); + + if (toAddRetryCache) { + fsNamesys.addCacheEntryWithPayload(op.rpcClientId, op.rpcCallId, + addOp.getEcPolicy()); + } + break; + case OP_ENABLE_ERASURE_CODING_POLICY: + EnableErasureCodingPolicyOp enableOp = (EnableErasureCodingPolicyOp) op; + fsNamesys.getErasureCodingPolicyManager().enablePolicy( + enableOp.getEcPolicy()); + if (toAddRetryCache) { + fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId); + } + break; + case OP_DISABLE_ERASURE_CODING_POLICY: + DisableErasureCodingPolicyOp disableOp = + (DisableErasureCodingPolicyOp) op; + fsNamesys.getErasureCodingPolicyManager().disablePolicy( + disableOp.getEcPolicy()); + if (toAddRetryCache) { + fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId); + } + break; + case OP_REMOVE_ERASURE_CODING_POLICY: + RemoveErasureCodingPolicyOp removeOp = (RemoveErasureCodingPolicyOp) op; + fsNamesys.getErasureCodingPolicyManager().removePolicy( + removeOp.getEcPolicy()); + if (toAddRetryCache) { + fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId); + } + break; default: throw new IOException("Invalid operation read " + op.opCode); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java index f93e867f8c1..2dc9d33c519 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.server.namenode; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_ADD; +import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_ADD_ERASURE_CODING_POLICY; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_APPEND; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_ADD_BLOCK; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_ADD_CACHE_DIRECTIVE; @@ -31,7 +32,9 @@ import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_CREATE_SNAPSHOT; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_DELETE; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_DELETE_SNAPSHOT; +import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_DISABLE_ERASURE_CODING_POLICY; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_DISALLOW_SNAPSHOT; +import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_ENABLE_ERASURE_CODING_POLICY; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_END_LOG_SEGMENT; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_GET_DELEGATION_TOKEN; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_INVALID; @@ -41,6 +44,7 @@ import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_REASSIGN_LEASE; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_REMOVE_CACHE_DIRECTIVE; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_REMOVE_CACHE_POOL; +import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_REMOVE_ERASURE_CODING_POLICY; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_REMOVE_XATTR; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_RENAME; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_RENAME_OLD; @@ -75,7 +79,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.EnumMap; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.zip.CheckedInputStream; import java.util.zip.Checksum; @@ -100,6 +106,7 @@ import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; import org.apache.hadoop.hdfs.protocol.CachePoolInfo; import org.apache.hadoop.hdfs.protocol.ClientProtocol; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.LayoutVersion; import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; @@ -119,6 +126,7 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableFactories; import org.apache.hadoop.io.WritableFactory; +import org.apache.hadoop.io.erasurecode.ECSchema; import org.apache.hadoop.ipc.ClientId; import org.apache.hadoop.ipc.RpcConstants; import org.apache.hadoop.security.token.delegation.DelegationKey; @@ -4339,6 +4347,323 @@ public void readFields(DataInput in) throws IOException { this.len = in.readLong(); } } + + /** + * Operation corresponding to add an erasure coding policy. + */ + static class AddErasureCodingPolicyOp extends FSEditLogOp { + private ErasureCodingPolicy ecPolicy; + + AddErasureCodingPolicyOp() { + super(OP_ADD_ERASURE_CODING_POLICY); + } + + static AddErasureCodingPolicyOp getInstance(OpInstanceCache cache) { + return (AddErasureCodingPolicyOp) cache + .get(OP_ADD_ERASURE_CODING_POLICY); + } + + @Override + void resetSubFields() { + this.ecPolicy = null; + } + + public ErasureCodingPolicy getEcPolicy() { + return this.ecPolicy; + } + + public AddErasureCodingPolicyOp setErasureCodingPolicy( + ErasureCodingPolicy policy) { + Preconditions.checkNotNull(policy.getName()); + Preconditions.checkNotNull(policy.getSchema()); + Preconditions.checkArgument(policy.getCellSize() > 0); + this.ecPolicy = policy; + return this; + } + + @Override + void readFields(DataInputStream in, int logVersion) throws IOException { + this.ecPolicy = FSImageSerialization.readErasureCodingPolicy(in); + readRpcIds(in, logVersion); + } + + @Override + public void writeFields(DataOutputStream out) throws IOException { + Preconditions.checkNotNull(ecPolicy); + FSImageSerialization.writeErasureCodingPolicy(out, ecPolicy); + writeRpcIds(rpcClientId, rpcCallId, out); + } + + @Override + protected void toXml(ContentHandler contentHandler) throws SAXException { + Preconditions.checkNotNull(ecPolicy); + XMLUtils.addSaxString(contentHandler, "CODEC", ecPolicy.getCodecName()); + XMLUtils.addSaxString(contentHandler, "DATAUNITS", + Integer.toString(ecPolicy.getNumDataUnits())); + XMLUtils.addSaxString(contentHandler, "PARITYUNITS", + Integer.toString(ecPolicy.getNumParityUnits())); + XMLUtils.addSaxString(contentHandler, "CELLSIZE", + Integer.toString(ecPolicy.getCellSize())); + + Map extraOptions = ecPolicy.getSchema().getExtraOptions(); + if (extraOptions == null || extraOptions.isEmpty()) { + XMLUtils.addSaxString(contentHandler, "EXTRAOPTIONS", + Integer.toString(0)); + appendRpcIdsToXml(contentHandler, rpcClientId, rpcCallId); + return; + } + + XMLUtils.addSaxString(contentHandler, "EXTRAOPTIONS", + Integer.toString(extraOptions.size())); + + for (Map.Entry entry : extraOptions.entrySet()) { + contentHandler.startElement("", "", "EXTRAOPTION", + new AttributesImpl()); + XMLUtils.addSaxString(contentHandler, "KEY", entry.getKey()); + XMLUtils.addSaxString(contentHandler, "VALUE", entry.getValue()); + contentHandler.endElement("", "", "EXTRAOPTION"); + } + appendRpcIdsToXml(contentHandler, rpcClientId, rpcCallId); + } + + @Override + void fromXml(Stanza st) throws InvalidXmlException { + final String codecName = st.getValue("CODEC"); + final int dataUnits = Integer.parseInt(st.getValue("DATAUNITS")); + final int parityUnits = Integer.parseInt(st.getValue("PARITYUNITS")); + final int cellSize = Integer.parseInt(st.getValue("CELLSIZE")); + final int extraOptionNum = Integer.parseInt(st.getValue("EXTRAOPTIONS")); + + ECSchema schema; + if (extraOptionNum == 0) { + schema = new ECSchema(codecName, dataUnits, parityUnits, null); + } else { + Map extraOptions = new HashMap(); + List stanzas = st.getChildren("EXTRAOPTION"); + for (Stanza a: stanzas) { + extraOptions.put(a.getValue("KEY"), a.getValue("VALUE")); + } + schema = new ECSchema(codecName, dataUnits, parityUnits, extraOptions); + } + this.ecPolicy = new ErasureCodingPolicy(schema, cellSize); + readRpcIdsFromXml(st); + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("AddErasureCodingPolicy ["); + builder.append(ecPolicy.toString()); + + appendRpcIdsToString(builder, rpcClientId, rpcCallId); + builder.append("]"); + return builder.toString(); + } + } + + /** + * Operation corresponding to enable an erasure coding policy. + */ + static class EnableErasureCodingPolicyOp extends FSEditLogOp { + private String ecPolicyName; + + EnableErasureCodingPolicyOp() { + super(OP_ENABLE_ERASURE_CODING_POLICY); + } + + static EnableErasureCodingPolicyOp getInstance(OpInstanceCache cache) { + return (EnableErasureCodingPolicyOp) cache + .get(OP_ENABLE_ERASURE_CODING_POLICY); + } + + @Override + void resetSubFields() { + this.ecPolicyName = null; + } + + public String getEcPolicy() { + return this.ecPolicyName; + } + + public EnableErasureCodingPolicyOp setErasureCodingPolicy( + String policyName) { + Preconditions.checkNotNull(policyName); + this.ecPolicyName = policyName; + return this; + } + + @Override + void readFields(DataInputStream in, int logVersion) throws IOException { + this.ecPolicyName = FSImageSerialization.readString(in); + readRpcIds(in, logVersion); + } + + @Override + public void writeFields(DataOutputStream out) throws IOException { + Preconditions.checkNotNull(ecPolicyName); + FSImageSerialization.writeString(ecPolicyName, out); + writeRpcIds(rpcClientId, rpcCallId, out); + } + + @Override + protected void toXml(ContentHandler contentHandler) throws SAXException { + Preconditions.checkNotNull(ecPolicyName); + XMLUtils.addSaxString(contentHandler, "POLICYNAME", this.ecPolicyName); + appendRpcIdsToXml(contentHandler, rpcClientId, rpcCallId); + } + + @Override + void fromXml(Stanza st) throws InvalidXmlException { + this.ecPolicyName = st.getValue("POLICYNAME"); + readRpcIdsFromXml(st); + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("EnableErasureCodingPolicy ["); + builder.append(ecPolicyName); + + appendRpcIdsToString(builder, rpcClientId, rpcCallId); + builder.append("]"); + return builder.toString(); + } + } + + /** + * Operation corresponding to disable an erasure coding policy. + */ + static class DisableErasureCodingPolicyOp extends FSEditLogOp { + private String ecPolicyName; + + DisableErasureCodingPolicyOp() { + super(OP_DISABLE_ERASURE_CODING_POLICY); + } + + static DisableErasureCodingPolicyOp getInstance(OpInstanceCache cache) { + return (DisableErasureCodingPolicyOp) cache + .get(OP_DISABLE_ERASURE_CODING_POLICY); + } + + @Override + void resetSubFields() { + this.ecPolicyName = null; + } + + public String getEcPolicy() { + return this.ecPolicyName; + } + + public DisableErasureCodingPolicyOp setErasureCodingPolicy( + String policyName) { + Preconditions.checkNotNull(policyName); + this.ecPolicyName = policyName; + return this; + } + + @Override + void readFields(DataInputStream in, int logVersion) throws IOException { + this.ecPolicyName = FSImageSerialization.readString(in); + readRpcIds(in, logVersion); + } + + @Override + public void writeFields(DataOutputStream out) throws IOException { + FSImageSerialization.writeString(ecPolicyName, out); + writeRpcIds(rpcClientId, rpcCallId, out); + } + + @Override + protected void toXml(ContentHandler contentHandler) throws SAXException { + XMLUtils.addSaxString(contentHandler, "POLICYNAME", this.ecPolicyName); + appendRpcIdsToXml(contentHandler, rpcClientId, rpcCallId); + } + + @Override + void fromXml(Stanza st) throws InvalidXmlException { + this.ecPolicyName = st.getValue("POLICYNAME"); + readRpcIdsFromXml(st); + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("DisableErasureCodingPolicy ["); + builder.append(ecPolicyName); + + appendRpcIdsToString(builder, rpcClientId, rpcCallId); + builder.append("]"); + return builder.toString(); + } + } + + /** + * Operation corresponding to remove an erasure coding policy. + */ + static class RemoveErasureCodingPolicyOp extends FSEditLogOp { + private String ecPolicyName; + + RemoveErasureCodingPolicyOp() { + super(OP_REMOVE_ERASURE_CODING_POLICY); + } + + static RemoveErasureCodingPolicyOp getInstance(OpInstanceCache cache) { + return (RemoveErasureCodingPolicyOp) cache + .get(OP_REMOVE_ERASURE_CODING_POLICY); + } + + @Override + void resetSubFields() { + this.ecPolicyName = null; + } + + public String getEcPolicy() { + return this.ecPolicyName; + } + + public RemoveErasureCodingPolicyOp setErasureCodingPolicy( + String policyName) { + Preconditions.checkNotNull(policyName); + this.ecPolicyName = policyName; + return this; + } + + @Override + void readFields(DataInputStream in, int logVersion) throws IOException { + this.ecPolicyName = FSImageSerialization.readString(in); + readRpcIds(in, logVersion); + } + + @Override + public void writeFields(DataOutputStream out) throws IOException { + FSImageSerialization.writeString(ecPolicyName, out); + writeRpcIds(rpcClientId, rpcCallId, out); + } + + @Override + protected void toXml(ContentHandler contentHandler) throws SAXException { + XMLUtils.addSaxString(contentHandler, "POLICYNAME", this.ecPolicyName); + appendRpcIdsToXml(contentHandler, rpcClientId, rpcCallId); + } + + @Override + void fromXml(Stanza st) throws InvalidXmlException { + this.ecPolicyName = st.getValue("POLICYNAME"); + readRpcIdsFromXml(st); + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("RemoveErasureCodingPolicy ["); + builder.append(ecPolicyName); + + appendRpcIdsToString(builder, rpcClientId, rpcCallId); + builder.append("]"); + return builder.toString(); + } + } + /** * Operation corresponding to upgrade */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java index 3f8febac35a..ce42e3faffe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java @@ -80,6 +80,11 @@ public enum FSEditLogOpCodes { OP_TRUNCATE ((byte) 46, TruncateOp.class), OP_APPEND ((byte) 47, AppendOp.class), OP_SET_QUOTA_BY_STORAGETYPE ((byte) 48, SetQuotaByStorageTypeOp.class), + OP_ADD_ERASURE_CODING_POLICY ((byte) 49, AddErasureCodingPolicyOp.class), + OP_ENABLE_ERASURE_CODING_POLICY((byte) 50, EnableErasureCodingPolicyOp.class), + OP_DISABLE_ERASURE_CODING_POLICY((byte) 51, + DisableErasureCodingPolicyOp.class), + OP_REMOVE_ERASURE_CODING_POLICY((byte) 52, RemoveErasureCodingPolicyOp.class), // Note that the current range of the valid OP code is 0~127 OP_INVALID ((byte) -1); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java index 9f8be89d64a..5e60038ce7a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java @@ -334,10 +334,10 @@ private INodeFile loadINodeFile(INodeSection.INode n) { boolean isStriped = f.hasErasureCodingPolicyID(); assert ((!isStriped) || (isStriped && !f.hasReplication())); Short replication = (!isStriped ? (short) f.getReplication() : null); + Byte ecPolicyID = (isStriped ? + (byte) f.getErasureCodingPolicyID() : null); ErasureCodingPolicy ecPolicy = isStriped ? - fsn.getErasureCodingPolicyManager().getByID( - (byte) f.getErasureCodingPolicyID()) : null; - Byte ecPolicyID = (isStriped ? ecPolicy.getId() : null); + fsn.getErasureCodingPolicyManager().getByID(ecPolicyID) : null; BlockInfo[] blocks = new BlockInfo[bp.size()]; for (int i = 0; i < bp.size(); ++i) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java index 22331fe0b33..ad8cdfcb138 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java @@ -36,10 +36,13 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; +import org.apache.hadoop.hdfs.protocolPB.PBHelperClient; import org.apache.hadoop.io.compress.CompressionOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -47,6 +50,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.ErasureCodingPolicyProto; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; import org.apache.hadoop.hdfs.server.blockmanagement.BlockIdManager; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; @@ -55,6 +59,7 @@ import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.ErasureCodingSection; import org.apache.hadoop.hdfs.server.namenode.snapshot.FSImageFormatPBSnapshot; import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; @@ -287,6 +292,12 @@ public int compare(FileSummary.Section s1, FileSummary.Section s2) { prog.endStep(Phase.LOADING_FSIMAGE, step); } break; + case ERASURE_CODING: + Step step = new Step(StepType.ERASURE_CODING_POLICIES); + prog.beginStep(Phase.LOADING_FSIMAGE, step); + loadErasureCodingSection(in); + prog.endStep(Phase.LOADING_FSIMAGE, step); + break; default: LOG.warn("Unrecognized section {}", n); break; @@ -366,6 +377,17 @@ private void loadCacheManagerSection(InputStream in, StartupProgress prog, new CacheManager.PersistState(s, pools, directives)); } + private void loadErasureCodingSection(InputStream in) + throws IOException { + ErasureCodingSection s = ErasureCodingSection.parseDelimitedFrom(in); + List ecPolicies = Lists + .newArrayListWithCapacity(s.getPoliciesCount()); + for (int i = 0; i < s.getPoliciesCount(); ++i) { + ecPolicies.add(PBHelperClient.convertErasureCodingPolicy( + s.getPolicies(i))); + } + fsn.getErasureCodingPolicyManager().loadPolicies(ecPolicies); + } } public static final class Saver { @@ -497,7 +519,13 @@ private void saveInternal(FileOutputStream fout, // depends on this behavior. context.checkCancelled(); - Step step = new Step(StepType.INODES, filePath); + // Erasure coding policies should be saved before inodes + Step step = new Step(StepType.ERASURE_CODING_POLICIES, filePath); + prog.beginStep(Phase.SAVING_CHECKPOINT, step); + saveErasureCodingSection(b); + prog.endStep(Phase.SAVING_CHECKPOINT, step); + + step = new Step(StepType.INODES, filePath); prog.beginStep(Phase.SAVING_CHECKPOINT, step); saveInodes(b); saveSnapshots(b); @@ -555,6 +583,23 @@ private void saveCacheManagerSection(FileSummary.Builder summary) commitSection(summary, SectionName.CACHE_MANAGER); } + private void saveErasureCodingSection( + FileSummary.Builder summary) throws IOException { + final FSNamesystem fsn = context.getSourceNamesystem(); + ErasureCodingPolicy[] ecPolicies = + fsn.getErasureCodingPolicyManager().getPolicies(); + ArrayList ecPolicyProtoes = + new ArrayList(); + for (ErasureCodingPolicy p : ecPolicies) { + ecPolicyProtoes.add(PBHelperClient.convertErasureCodingPolicy(p)); + } + + ErasureCodingSection section = ErasureCodingSection.newBuilder(). + addAllPolicies(ecPolicyProtoes).build(); + section.writeDelimitedTo(sectionOutputStream); + commitSection(summary, SectionName.ERASURE_CODING); + } + private void saveNameSystemSection(FileSummary.Builder summary) throws IOException { final FSNamesystem fsn = context.getSourceNamesystem(); @@ -606,6 +651,7 @@ public enum SectionName { NS_INFO("NS_INFO"), STRING_TABLE("STRING_TABLE"), EXTENDED_ACL("EXTENDED_ACL"), + ERASURE_CODING("ERASURE_CODING"), INODE("INODE"), INODE_REFERENCE("INODE_REFERENCE"), SNAPSHOT("SNAPSHOT"), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java index fd2700bf114..4d8b627cb0e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java @@ -21,6 +21,8 @@ import java.io.DataOutput; import java.io.DataOutputStream; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -33,6 +35,7 @@ import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; import org.apache.hadoop.hdfs.protocol.CachePoolInfo; import org.apache.hadoop.hdfs.protocol.LayoutVersion; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat; @@ -46,6 +49,7 @@ import org.apache.hadoop.io.ShortWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.io.erasurecode.ECSchema; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; @@ -750,4 +754,45 @@ public static CachePoolInfo readCachePoolInfo(Stanza st) return info; } + public static void writeErasureCodingPolicy(DataOutputStream out, + ErasureCodingPolicy ecPolicy) throws IOException { + writeString(ecPolicy.getSchema().getCodecName(), out); + writeInt(ecPolicy.getNumDataUnits(), out); + writeInt(ecPolicy.getNumParityUnits(), out); + writeInt(ecPolicy.getCellSize(), out); + + Map extraOptions = ecPolicy.getSchema().getExtraOptions(); + if (extraOptions == null || extraOptions.isEmpty()) { + writeInt(0, out); + return; + } + + writeInt(extraOptions.size(), out); + for (Map.Entry entry : extraOptions.entrySet()) { + writeString(entry.getKey(), out); + writeString(entry.getValue(), out); + } + } + + public static ErasureCodingPolicy readErasureCodingPolicy(DataInput in) + throws IOException { + String codecName = readString(in); + int numDataUnits = readInt(in); + int numParityUnits = readInt(in); + int cellSize = readInt(in); + + int size = readInt(in); + Map extraOptions = new HashMap<>(size); + + if (size != 0) { + for (int i = 0; i < size; i++) { + String key = readString(in); + String value = readString(in); + extraOptions.put(key, value); + } + } + ECSchema ecSchema = new ECSchema(codecName, numDataUnits, + numParityUnits, extraOptions); + return new ErasureCodingPolicy(ecSchema, cellSize); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index c30999b8f49..029557f2485 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -89,8 +89,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY; import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*; -import org.apache.hadoop.hdfs.protocol.BlocksStats; -import org.apache.hadoop.hdfs.protocol.ECBlockGroupsStats; +import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats; +import org.apache.hadoop.hdfs.protocol.ECBlockGroupStats; import org.apache.hadoop.hdfs.protocol.OpenFileEntry; import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus; import org.apache.hadoop.hdfs.server.namenode.metrics.ReplicatedBlocksMBean; @@ -4080,10 +4080,10 @@ long[] getStats() { * Get statistics pertaining to blocks of type {@link BlockType#CONTIGUOUS} * in the filesystem. *

- * @see ClientProtocol#getBlocksStats() + * @see ClientProtocol#getReplicatedBlockStats() */ - BlocksStats getBlocksStats() { - return new BlocksStats(getLowRedundancyReplicatedBlocks(), + ReplicatedBlockStats getReplicatedBlockStats() { + return new ReplicatedBlockStats(getLowRedundancyReplicatedBlocks(), getCorruptReplicatedBlocks(), getMissingReplicatedBlocks(), getMissingReplicationOneBlocks(), getBytesInFutureReplicatedBlocks(), getPendingDeletionReplicatedBlocks()); @@ -4093,12 +4093,12 @@ BlocksStats getBlocksStats() { * Get statistics pertaining to blocks of type {@link BlockType#STRIPED} * in the filesystem. *

- * @see ClientProtocol#getECBlockGroupsStats() + * @see ClientProtocol#getECBlockGroupStats() */ - ECBlockGroupsStats getECBlockGroupsStats() { - return new ECBlockGroupsStats(getLowRedundancyECBlockGroups(), + ECBlockGroupStats getECBlockGroupStats() { + return new ECBlockGroupStats(getLowRedundancyECBlockGroups(), getCorruptECBlockGroups(), getMissingECBlockGroups(), - getBytesInFutureECBlockGroups(), getPendingDeletionECBlockGroups()); + getBytesInFutureECBlockGroups(), getPendingDeletionECBlocks()); } @Override // FSNamesystemMBean @@ -4711,10 +4711,10 @@ public long getBytesInFutureECBlockGroups() { } @Override // ECBlockGroupsMBean - @Metric({"PendingDeletionECBlockGroups", "Number of erasure coded block " + - "groups that are pending deletion"}) - public long getPendingDeletionECBlockGroups() { - return blockManager.getPendingDeletionECBlockGroups(); + @Metric({"PendingDeletionECBlocks", "Number of erasure coded blocks " + + "that are pending deletion"}) + public long getPendingDeletionECBlocks() { + return blockManager.getPendingDeletionECBlocks(); } @Override @@ -4993,6 +4993,7 @@ private long nextBlockId(BlockType blockType) throws IOException { } boolean isFileDeleted(INodeFile file) { + assert hasReadLock(); // Not in the inodeMap or in the snapshot but marked deleted. if (dir.getInode(file.getId()) == null) { return true; @@ -7110,6 +7111,8 @@ private void reencryptEncryptionZoneInt(final String zone, if (keyVersionName == null) { throw new IOException("Failed to get key version name for " + zone); } + LOG.info("Re-encryption using key version " + keyVersionName + + " for zone " + zone); } writeLock(); try { @@ -7186,10 +7189,12 @@ void setErasureCodingPolicy(final String srcArg, final String ecPolicyName, /** * Add multiple erasure coding policies to the ErasureCodingPolicyManager. * @param policies The policies to add. + * @param logRetryCache whether to record RPC ids in editlog for retry cache + * rebuilding * @return The according result of add operation. */ - AddECPolicyResponse[] addErasureCodingPolicies(ErasureCodingPolicy[] policies) - throws IOException { + AddECPolicyResponse[] addErasureCodingPolicies(ErasureCodingPolicy[] policies, + final boolean logRetryCache) throws IOException { final String operationName = "addErasureCodingPolicies"; String addECPolicyName = ""; checkOperation(OperationCategory.WRITE); @@ -7198,12 +7203,12 @@ AddECPolicyResponse[] addErasureCodingPolicies(ErasureCodingPolicy[] policies) writeLock(); try { checkOperation(OperationCategory.WRITE); + checkNameNodeSafeMode("Cannot add erasure coding policy"); for (ErasureCodingPolicy policy : policies) { try { - checkOperation(OperationCategory.WRITE); - checkNameNodeSafeMode("Cannot add erasure coding policy"); ErasureCodingPolicy newPolicy = - FSDirErasureCodingOp.addErasureCodePolicy(this, policy); + FSDirErasureCodingOp.addErasureCodingPolicy(this, policy, + logRetryCache); addECPolicyName = newPolicy.getName(); responses.add(new AddECPolicyResponse(newPolicy)); } catch (HadoopIllegalArgumentException e) { @@ -7224,9 +7229,12 @@ AddECPolicyResponse[] addErasureCodingPolicies(ErasureCodingPolicy[] policies) /** * Remove an erasure coding policy. * @param ecPolicyName the name of the policy to be removed + * @param logRetryCache whether to record RPC ids in editlog for retry cache + * rebuilding * @throws IOException */ - void removeErasureCodingPolicy(String ecPolicyName) throws IOException { + void removeErasureCodingPolicy(String ecPolicyName, + final boolean logRetryCache) throws IOException { final String operationName = "removeErasureCodingPolicy"; checkOperation(OperationCategory.WRITE); boolean success = false; @@ -7235,23 +7243,27 @@ void removeErasureCodingPolicy(String ecPolicyName) throws IOException { checkOperation(OperationCategory.WRITE); checkNameNodeSafeMode("Cannot remove erasure coding policy " + ecPolicyName); - FSDirErasureCodingOp.removeErasureCodePolicy(this, ecPolicyName); + FSDirErasureCodingOp.removeErasureCodingPolicy(this, ecPolicyName, + logRetryCache); success = true; } finally { writeUnlock(operationName); if (success) { getEditLog().logSync(); } - logAuditEvent(success, operationName, null, null, null); + logAuditEvent(success, operationName, ecPolicyName, null, null); } } /** * Enable an erasure coding policy. * @param ecPolicyName the name of the policy to be enabled + * @param logRetryCache whether to record RPC ids in editlog for retry cache + * rebuilding * @throws IOException */ - void enableErasureCodingPolicy(String ecPolicyName) throws IOException { + void enableErasureCodingPolicy(String ecPolicyName, + final boolean logRetryCache) throws IOException { final String operationName = "enableErasureCodingPolicy"; checkOperation(OperationCategory.WRITE); boolean success = false; @@ -7261,7 +7273,8 @@ void enableErasureCodingPolicy(String ecPolicyName) throws IOException { checkOperation(OperationCategory.WRITE); checkNameNodeSafeMode("Cannot enable erasure coding policy " + ecPolicyName); - FSDirErasureCodingOp.enableErasureCodePolicy(this, ecPolicyName); + FSDirErasureCodingOp.enableErasureCodingPolicy(this, ecPolicyName, + logRetryCache); success = true; } finally { writeUnlock(operationName); @@ -7275,9 +7288,12 @@ void enableErasureCodingPolicy(String ecPolicyName) throws IOException { /** * Disable an erasure coding policy. * @param ecPolicyName the name of the policy to be disabled + * @param logRetryCache whether to record RPC ids in editlog for retry cache + * rebuilding * @throws IOException */ - void disableErasureCodingPolicy(String ecPolicyName) throws IOException { + void disableErasureCodingPolicy(String ecPolicyName, + final boolean logRetryCache) throws IOException { final String operationName = "disableErasureCodingPolicy"; checkOperation(OperationCategory.WRITE); boolean success = false; @@ -7287,7 +7303,8 @@ void disableErasureCodingPolicy(String ecPolicyName) throws IOException { checkOperation(OperationCategory.WRITE); checkNameNodeSafeMode("Cannot disable erasure coding policy " + ecPolicyName); - FSDirErasureCodingOp.disableErasureCodePolicy(this, ecPolicyName); + FSDirErasureCodingOp.disableErasureCodingPolicy(this, ecPolicyName, + logRetryCache); success = true; } finally { writeUnlock(operationName); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java index 35ec063a1b2..45699cb4412 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java @@ -557,6 +557,12 @@ synchronized boolean checkLeases() { if (!p.startsWith("/")) { throw new IOException("Invalid path in the lease " + p); } + final INodeFile lastINode = iip.getLastINode().asFile(); + if (fsnamesystem.isFileDeleted(lastINode)) { + // INode referred by the lease could have been deleted. + removeLease(lastINode.getId()); + continue; + } boolean completed = false; try { completed = fsnamesystem.internalReleaseLease( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index 3fbb7bd996c..d62c0f2dcc2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -98,7 +98,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DirectoryListing; -import org.apache.hadoop.hdfs.protocol.ECBlockGroupsStats; +import org.apache.hadoop.hdfs.protocol.ECBlockGroupStats; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.protocol.EncryptionZone; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; @@ -116,7 +116,7 @@ import org.apache.hadoop.hdfs.protocol.QuotaByStorageTypeExceededException; import org.apache.hadoop.hdfs.protocol.QuotaExceededException; import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException; -import org.apache.hadoop.hdfs.protocol.BlocksStats; +import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats; import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus; import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo; import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; @@ -517,10 +517,10 @@ public NameNodeRpcServer(Configuration conf, NameNode nn) QuotaByStorageTypeExceededException.class, AclException.class, FSLimitException.PathComponentTooLongException.class, - FSLimitException.MaxDirectoryItemsExceededException.class, - UnresolvedPathException.class); + FSLimitException.MaxDirectoryItemsExceededException.class); - clientRpcServer.addSuppressedLoggingExceptions(StandbyException.class); + clientRpcServer.addSuppressedLoggingExceptions(StandbyException.class, + UnresolvedPathException.class); clientRpcServer.setTracer(nn.tracer); if (serviceRpcServer != null) { @@ -1163,17 +1163,17 @@ public long[] getStats() throws IOException { } @Override // ClientProtocol - public BlocksStats getBlocksStats() throws IOException { + public ReplicatedBlockStats getReplicatedBlockStats() throws IOException { checkNNStartup(); namesystem.checkOperation(OperationCategory.READ); - return namesystem.getBlocksStats(); + return namesystem.getReplicatedBlockStats(); } @Override // ClientProtocol - public ECBlockGroupsStats getECBlockGroupsStats() throws IOException { + public ECBlockGroupStats getECBlockGroupStats() throws IOException { checkNNStartup(); namesystem.checkOperation(OperationCategory.READ); - return namesystem.getECBlockGroupsStats(); + return namesystem.getECBlockGroupStats(); } @Override // ClientProtocol @@ -2337,7 +2337,21 @@ public AddECPolicyResponse[] addErasureCodingPolicies( ErasureCodingPolicy[] policies) throws IOException { checkNNStartup(); namesystem.checkSuperuserPrivilege(); - return namesystem.addErasureCodingPolicies(policies); + final CacheEntryWithPayload cacheEntry = + RetryCache.waitForCompletion(retryCache, null); + if (cacheEntry != null && cacheEntry.isSuccess()) { + return (AddECPolicyResponse[]) cacheEntry.getPayload(); + } + boolean success = false; + AddECPolicyResponse[] responses = new AddECPolicyResponse[0]; + try { + responses = + namesystem.addErasureCodingPolicies(policies, cacheEntry != null); + success = true; + } finally { + RetryCache.setState(cacheEntry, success, responses); + } + return responses; } @Override @@ -2345,7 +2359,17 @@ public void removeErasureCodingPolicy(String ecPolicyName) throws IOException { checkNNStartup(); namesystem.checkSuperuserPrivilege(); - namesystem.removeErasureCodingPolicy(ecPolicyName); + final CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); + if (cacheEntry != null && cacheEntry.isSuccess()) { + return; + } + boolean success = false; + try { + namesystem.removeErasureCodingPolicy(ecPolicyName, cacheEntry != null); + success = true; + } finally { + RetryCache.setState(cacheEntry, success); + } } @Override // ClientProtocol @@ -2353,7 +2377,17 @@ public void enableErasureCodingPolicy(String ecPolicyName) throws IOException { checkNNStartup(); namesystem.checkSuperuserPrivilege(); - namesystem.enableErasureCodingPolicy(ecPolicyName); + final CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); + if (cacheEntry != null && cacheEntry.isSuccess()) { + return; + } + boolean success = false; + try { + namesystem.enableErasureCodingPolicy(ecPolicyName, cacheEntry != null); + success = true; + } finally { + RetryCache.setState(cacheEntry, success); + } } @Override // ClientProtocol @@ -2361,7 +2395,17 @@ public void disableErasureCodingPolicy(String ecPolicyName) throws IOException { checkNNStartup(); namesystem.checkSuperuserPrivilege(); - namesystem.disableErasureCodingPolicy(ecPolicyName); + final CacheEntry cacheEntry = RetryCache.waitForCompletion(retryCache); + if (cacheEntry != null && cacheEntry.isSuccess()) { + return; + } + boolean success = false; + try { + namesystem.disableErasureCodingPolicy(ecPolicyName, cacheEntry != null); + success = true; + } finally { + RetryCache.setState(cacheEntry, success); + } } @Override // ReconfigurationProtocol diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/ECBlockGroupsMBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/ECBlockGroupsMBean.java index 5fa646a6c44..474f3edb975 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/ECBlockGroupsMBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/ECBlockGroupsMBean.java @@ -53,7 +53,7 @@ public interface ECBlockGroupsMBean { long getBytesInFutureECBlockGroups(); /** - * Return count of erasure coded block groups that are pending deletion. + * Return count of erasure coded blocks that are pending deletion. */ - long getPendingDeletionECBlockGroups(); + long getPendingDeletionECBlocks(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/StepType.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/StepType.java index 1b43d6a2b09..83cf6cffac0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/StepType.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/startupprogress/StepType.java @@ -52,7 +52,12 @@ public enum StepType { /** * The namenode is performing an operation related to cache entries. */ - CACHE_ENTRIES("CacheEntries", "cache entries"); + CACHE_ENTRIES("CacheEntries", "cache entries"), + + /** + * The namenode is performing an operation related to erasure coding policies. + */ + ERASURE_CODING_POLICIES("ErasureCodingPolicies", "erasure coding policies"); private final String name, description; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java index 139680c672d..e4008479fae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java @@ -54,7 +54,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -993,21 +992,6 @@ private Response get( return Response.ok(js).type(MediaType.APPLICATION_JSON).build(); } } - case GETFILEBLOCKLOCATIONS: - { - final long offsetValue = offset.getValue(); - final Long lengthValue = length.getValue(); - - FileSystem fs = FileSystem.get(conf != null ? - conf : new Configuration()); - BlockLocation[] locations = fs.getFileBlockLocations( - new org.apache.hadoop.fs.Path(fullpath), - offsetValue, - lengthValue != null? lengthValue: Long.MAX_VALUE); - final String js = JsonUtil.toJsonString("BlockLocations", - JsonUtil.toJsonMap(locations)); - return Response.ok(js).type(MediaType.APPLICATION_JSON).build(); - } case GET_BLOCK_LOCATIONS: { final long offsetValue = offset.getValue(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java index 88aafe270e0..9805972bdfe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java @@ -66,13 +66,13 @@ import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.NameNodeProxies; import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo; -import org.apache.hadoop.hdfs.protocol.BlocksStats; +import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats; import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol; import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DatanodeLocalInfo; import org.apache.hadoop.hdfs.protocol.DatanodeVolumeInfo; -import org.apache.hadoop.hdfs.protocol.ECBlockGroupsStats; +import org.apache.hadoop.hdfs.protocol.ECBlockGroupStats; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction; @@ -534,30 +534,31 @@ public void report(String[] argv, int i) throws IOException { * minutes. Use "-metaSave" to list of all such blocks and accurate * counts. */ - BlocksStats blocksStats = dfs.getClient().getNamenode().getBlocksStats(); + ReplicatedBlockStats replicatedBlockStats = + dfs.getClient().getNamenode().getReplicatedBlockStats(); System.out.println("Replicated Blocks:"); System.out.println("\tUnder replicated blocks: " + - blocksStats.getLowRedundancyBlocksStat()); + replicatedBlockStats.getLowRedundancyBlocks()); System.out.println("\tBlocks with corrupt replicas: " + - blocksStats.getCorruptBlocksStat()); + replicatedBlockStats.getCorruptBlocks()); System.out.println("\tMissing blocks: " + - blocksStats.getMissingReplicaBlocksStat()); + replicatedBlockStats.getMissingReplicaBlocks()); System.out.println("\tMissing blocks (with replication factor 1): " + - blocksStats.getMissingReplicationOneBlocksStat()); + replicatedBlockStats.getMissingReplicationOneBlocks()); System.out.println("\tPending deletion blocks: " + - blocksStats.getPendingDeletionBlocksStat()); + replicatedBlockStats.getPendingDeletionBlocks()); - ECBlockGroupsStats ecBlockGroupsStats = - dfs.getClient().getNamenode().getECBlockGroupsStats(); + ECBlockGroupStats ecBlockGroupStats = + dfs.getClient().getNamenode().getECBlockGroupStats(); System.out.println("Erasure Coded Block Groups: "); System.out.println("\tLow redundancy block groups: " + - ecBlockGroupsStats.getLowRedundancyBlockGroupsStat()); + ecBlockGroupStats.getLowRedundancyBlockGroups()); System.out.println("\tBlock groups with corrupt internal blocks: " + - ecBlockGroupsStats.getCorruptBlockGroupsStat()); + ecBlockGroupStats.getCorruptBlockGroups()); System.out.println("\tMissing block groups: " + - ecBlockGroupsStats.getMissingBlockGroupsStat()); - System.out.println("\tPending deletion block groups: " + - ecBlockGroupsStats.getPendingDeletionBlockGroupsStat()); + ecBlockGroupStats.getMissingBlockGroups()); + System.out.println("\tPending deletion blocks: " + + ecBlockGroupStats.getPendingDeletionBlocks()); System.out.println(); @@ -2243,7 +2244,7 @@ public int run(String[] argv) throws Exception { System.err.println(cmd.substring(1) + ": " + e.getLocalizedMessage()); } - if (LOG.isDebugEnabled()) { + if (LOG.isDebugEnabled() && debugException != null) { LOG.debug("Exception encountered:", debugException); } return exitCode; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java index 94752f53576..5752948686d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java @@ -470,37 +470,4 @@ private static Object toJsonMap(BlockStoragePolicy blockStoragePolicy) { public static String toJsonString(BlockStoragePolicy storagePolicy) { return toJsonString(BlockStoragePolicy.class, toJsonMap(storagePolicy)); } - - public static Map toJsonMap(BlockLocation[] locations) - throws IOException { - if(locations == null) { - return null; - } - final Map m = new TreeMap(); - Object[] blockLocations = new Object[locations.length]; - for(int i=0; i toJsonMap( - final BlockLocation blockLocation) throws IOException { - if (blockLocation == null) { - return null; - } - - final Map m = new TreeMap(); - m.put("length", blockLocation.getLength()); - m.put("offset", blockLocation.getOffset()); - m.put("corrupt", blockLocation.isCorrupt()); - m.put("storageTypes", toJsonArray(blockLocation.getStorageTypes())); - m.put("storageIds", blockLocation.getStorageIds()); - m.put("cachedHosts", blockLocation.getCachedHosts()); - m.put("hosts", blockLocation.getHosts()); - m.put("names", blockLocation.getNames()); - m.put("topologyPaths", blockLocation.getTopologyPaths()); - return m; - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto index 4e21310d2a3..101a0605acb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto @@ -346,3 +346,7 @@ message CacheManagerSection { // repeated CachePoolInfoProto pools // repeated CacheDirectiveInfoProto directives } + +message ErasureCodingSection { + repeated ErasureCodingPolicyProto policies = 1; +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 36c74f6d46e..af40a34ba78 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -3017,16 +3017,6 @@ - - dfs.namenode.ec.policies.enabled - - Comma-delimited list of enabled erasure coding policies. - The NameNode will enforce this when setting an erasure coding policy - on a directory. By default, none of the built-in erasure coding - policies are enabled. - - - dfs.namenode.ec.system.default.policy RS-6-3-1024k @@ -3049,15 +3039,6 @@ - - dfs.datanode.ec.reconstruction.stripedread.threads - 20 - - Number of threads used by the Datanode to read striped block - during background reconstruction work. - - - dfs.datanode.ec.reconstruction.stripedread.buffer.size 65536 @@ -3066,7 +3047,7 @@ - dfs.datanode.ec.reconstruction.stripedblock.threads.size + dfs.datanode.ec.reconstruction.threads 8 Number of threads used by the Datanode for background @@ -4160,11 +4141,11 @@ - dfs.namenode.authorization.provider.bypass.users + dfs.namenode.inode.attributes.provider.bypass.users A list of user principals (in secure cluster) or user names (in insecure - cluster) for whom the external attribute provider will be bypassed for all + cluster) for whom the external attributes provider will be bypassed for all operations. This means file attributes stored in HDFS instead of the external provider will be used for permission checking and be returned when requested. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js index b785274f4df..de62622e3d2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js @@ -331,12 +331,12 @@ 'columns': [ { 'orderDataType': 'ng-value', 'searchable': true }, { 'orderDataType': 'ng-value', 'searchable': true }, - { 'orderDataType': 'ng-value', 'type': 'numeric' }, - { 'orderDataType': 'ng-value', 'type': 'numeric' }, - { 'orderDataType': 'ng-value', 'type': 'numeric' }, - { 'orderData': 3, 'type': 'numeric' }, - { 'orderDataType': 'ng-value', 'type': 'numeric'}, - { 'orderData': 5 } + { 'orderDataType': 'ng-value', 'type': 'num' }, + { 'orderDataType': 'ng-value', 'type': 'num' }, + { 'orderDataType': 'ng-value', 'type': 'num' }, + { 'type': 'num' }, + { 'orderDataType': 'ng-value', 'type': 'num'}, + { 'type': 'string' } ]}); renderHistogram(data); $('#ui-tabs a[href="#tab-datanode"]').tab('show'); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md index ddf474ac492..be36cc2de50 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md @@ -113,7 +113,7 @@ Here is an example configuration with two Namenodes: nn-host1:http-port - dfs.namenode.secondaryhttp-address.ns1 + dfs.namenode.secondary.http-address.ns1 snn-host1:http-port @@ -125,7 +125,7 @@ Here is an example configuration with two Namenodes: nn-host2:http-port - dfs.namenode.secondaryhttp-address.ns2 + dfs.namenode.secondary.http-address.ns2 snn-host2:http-port diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSErasureCoding.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSErasureCoding.md index 5bd7c6d29ae..c8ef6c7e6c8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSErasureCoding.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSErasureCoding.md @@ -69,7 +69,7 @@ Architecture `REPLICATION` is a special policy. It can only be set on directory, to force the directory to adopt 3x replication scheme, instead of inheriting its ancestor's erasure coding policy. This policy makes it possible to interleave 3x replication scheme directory with erasure coding directory. - `REPLICATION` policy is always enabled. For other built-in policies, unless they are configured in `dfs.namenode.ec.policies.enabled` property, otherwise they are disabled by default. + `REPLICATION` policy is always enabled. For other built-in policies, they are disabled by default. Similar to HDFS storage policies, erasure coding policies are set on a directory. When a file is created, it inherits the EC policy of its nearest ancestor directory. @@ -110,11 +110,8 @@ Deployment ### Configuration keys - The set of enabled erasure coding policies can be configured on the NameNode via `dfs.namenode.ec.policies.enabled` configuration. This restricts - what EC policies can be set by clients. It does not affect the behavior of already set file or directory-level EC policies. - - By default, all built-in erasure coding policies are disabled. Typically, the cluster administrator will enable set of policies by including them - in the `dfs.namenode.ec.policies.enabled` configuration based on the size of the cluster and the desired fault-tolerance properties. For instance, + By default, all built-in erasure coding policies are disabled, except the one defined in `dfs.namenode.ec.system.default.policy` which is enabled by default. + The cluster administrator can enable set of policies through `hdfs ec [-enablePolicy -policy ]` command based on the size of the cluster and the desired fault-tolerance properties. For instance, for a cluster with 9 racks, a policy like `RS-10-4-1024k` will not preserve rack-level fault-tolerance, and `RS-6-3-1024k` or `RS-3-2-1024k` might be more appropriate. If the administrator only cares about node-level fault-tolerance, `RS-10-4-1024k` would still be appropriate as long as there are at least 14 DataNodes in the cluster. @@ -137,9 +134,8 @@ Deployment Erasure coding background recovery work on the DataNodes can also be tuned via the following configuration parameters: 1. `dfs.datanode.ec.reconstruction.stripedread.timeout.millis` - Timeout for striped reads. Default value is 5000 ms. - 1. `dfs.datanode.ec.reconstruction.stripedread.threads` - Number of concurrent reader threads. Default value is 20 threads. 1. `dfs.datanode.ec.reconstruction.stripedread.buffer.size` - Buffer size for reader service. Default value is 64KB. - 1. `dfs.datanode.ec.reconstruction.stripedblock.threads.size` - Number of threads used by the Datanode for background reconstruction work. Default value is 8 threads. + 1. `dfs.datanode.ec.reconstruction.threads` - Number of threads used by the Datanode for background reconstruction work. Default value is 8 threads. ### Enable Intel ISA-L diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsEditsViewer.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsEditsViewer.md index 4ab07ce2143..ce798b72186 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsEditsViewer.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsEditsViewer.md @@ -108,6 +108,10 @@ The output result of this processor should be like the following output: ...some output omitted... OP_APPEND ( 47): 1 OP_SET_QUOTA_BY_STORAGETYPE ( 48): 1 + OP_ADD_ERASURE_CODING_POLICY ( 49): 0 + OP_ENABLE_ERASURE_CODING_POLICY ( 50): 1 + OP_DISABLE_ERASURE_CODING_POLICY ( 51): 0 + OP_REMOVE_ERASURE_CODING_POLICY ( 52): 0 OP_INVALID ( -1): 0 The output is formatted as a colon separated two column table: OpCode and OpCodeCount. Each OpCode corresponding to the specific operation(s) in NameNode. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md index 03834ebf07d..84e8a576b00 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md @@ -50,7 +50,6 @@ The HTTP REST API supports the complete [FileSystem](../../api/org/apache/hadoop * [`CHECKACCESS`](#Check_access) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).access) * [`GETALLSTORAGEPOLICY`](#Get_all_Storage_Policies) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getAllStoragePolicies) * [`GETSTORAGEPOLICY`](#Get_Storage_Policy) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getStoragePolicy) - * [`GETFILEBLOCKLOCATIONS`](#Get_File_Block_Locations) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getFileBlockLocations) * HTTP PUT * [`CREATE`](#Create_and_Write_to_a_File) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).create) * [`MKDIRS`](#Make_a_Directory) (see [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).mkdirs) @@ -1069,7 +1068,7 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).unsetStor { "BlockStoragePolicy": { "copyOnCreateFile": false, - "creationFallbacks": [], + "creationFallbacks": [], "id":7, "name":"HOT", "replicationFallbacks":["ARCHIVE"], @@ -1079,51 +1078,6 @@ See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).unsetStor See also: [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getStoragePolicy -### Get File Block Locations - -* Submit a HTTP GET request. - - curl -i "http://:/webhdfs/v1/?op=GETFILEBLOCKLOCATIONS - - The client receives a response with a [`BlockLocations` JSON Object](#Block_Locations_JSON_Schema): - - HTTP/1.1 200 OK - Content-Type: application/json - Transfer-Encoding: chunked - - { - "BlockLocations" : - { - "BlockLocation": - [ - { - "cachedHosts" : [], - "corrupt" : false, - "hosts" : ["host"], - "length" : 134217728, // length of this block - "names" : ["host:ip"], - "offset" : 0, // offset of the block in the file - "storageIds" : ["storageid"], - "storageTypes" : ["DISK"], // enum {RAM_DISK, SSD, DISK, ARCHIVE} - "topologyPaths" : ["/default-rack/hostname:ip"] - }, { - "cachedHosts" : [], - "corrupt" : false, - "hosts" : ["host"], - "length" : 62599364, - "names" : ["host:ip"], - "offset" : 134217728, - "storageIds" : ["storageid"], - "storageTypes" : ["DISK"], - "topologyPaths" : ["/default-rack/hostname:ip"] - }, - ... - ] - } - } - -See also: [`offset`](#Offset), [`length`](#Length), [FileSystem](../../api/org/apache/hadoop/fs/FileSystem.html).getFileBlockLocations - Extended Attributes(XAttrs) Operations -------------------------------------- @@ -2082,146 +2036,6 @@ A `BlockStoragePolicies` JSON object represents an array of `BlockStoragePolicy` } ``` -#### BlockLocations JSON Schema - -A `BlockLocations` JSON object represents an array of `BlockLocation` JSON objects. - -```json -{ - "name" : "BlockLocations", - "properties": - { - "BlockLocations": - { - "type" : "object", - "properties": - { - "BlockLocation": - { - "description": "An array of BlockLocation", - "type" : "array", - "items" : blockLocationProperties //See BlockLocation Properties - } - } - } - } -} -``` - -See also [`BlockLocation` Properties](#BlockLocation_Properties), [`GETFILEBLOCKLOCATIONS`](#Get_File_Block_Locations), [BlockLocation](../../api/org/apache/hadoop/fs/BlockLocation.html) - -### BlockLocation JSON Schema - -```json -{ - "name" : "BlockLocation", - "properties": - { - "BlockLocation": blockLocationProperties //See BlockLocation Properties - } -} -``` - -See also [`BlockLocation` Properties](#BlockLocation_Properties), [`GETFILEBLOCKLOCATIONS`](#Get_File_Block_Locations), [BlockLocation](../../api/org/apache/hadoop/fs/BlockLocation.html) - -#### BlockLocation Properties - -JavaScript syntax is used to define `blockLocationProperties` so that it can be referred in both `BlockLocation` and `BlockLocations` JSON schemas. - -```javascript -var blockLocationProperties = -{ - "type" : "object", - "properties": - { - "cachedHosts": - { - "description": "Datanode hostnames with a cached replica", - "type" : "array", - "required" : "true", - "items" : - { - "description": "A datanode hostname", - "type" : "string" - } - }, - "corrupt": - { - "description": "True if the block is corrupted", - "type" : "boolean", - "required" : "true" - }, - "hosts": - { - "description": "Datanode hostnames store the block", - "type" : "array", - "required" : "true", - "items" : - { - "description": "A datanode hostname", - "type" : "string" - } - }, - "length": - { - "description": "Length of the block", - "type" : "integer", - "required" : "true" - }, - "names": - { - "description": "Datanode IP:xferPort for accessing the block", - "type" : "array", - "required" : "true", - "items" : - { - "description": "DatanodeIP:xferPort", - "type" : "string" - } - }, - "offset": - { - "description": "Offset of the block in the file", - "type" : "integer", - "required" : "true" - }, - "storageIds": - { - "description": "Storage ID of each replica", - "type" : "array", - "required" : "true", - "items" : - { - "description": "Storage ID", - "type" : "string" - } - }, - "storageTypes": - { - "description": "Storage type of each replica", - "type" : "array", - "required" : "true", - "items" : - { - "description": "Storage type", - "enum" : ["RAM_DISK", "SSD", "DISK", "ARCHIVE"] - } - }, - "topologyPaths": - { - "description": "Datanode addresses in network topology", - "type" : "array", - "required" : "true", - "items" : - { - "description": "/rack/host:ip", - "type" : "string" - } - } - } -}; -``` - HTTP Query Parameter Dictionary ------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestErasureCodingCLI.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestErasureCodingCLI.java index 60f4f561a12..566755db996 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestErasureCodingCLI.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestErasureCodingCLI.java @@ -21,8 +21,8 @@ import org.apache.hadoop.cli.util.CLICommand; import org.apache.hadoop.cli.util.CLICommandErasureCodingCli; import org.apache.hadoop.cli.util.CommandExecutor.Result; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.junit.After; import org.junit.Before; @@ -34,7 +34,7 @@ public class TestErasureCodingCLI extends CLITestHelper { private final int NUM_OF_DATANODES = 3; private MiniDFSCluster dfsCluster = null; - private FileSystem fs = null; + private DistributedFileSystem fs = null; private String namenode = null; @Rule @@ -44,10 +44,6 @@ public class TestErasureCodingCLI extends CLITestHelper { @Override public void setUp() throws Exception { super.setUp(); - - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - "RS-6-3-1024k,RS-3-2-1024k,XOR-2-1-1024k"); - dfsCluster = new MiniDFSCluster.Builder(conf) .numDataNodes(NUM_OF_DATANODES).build(); dfsCluster.waitClusterUp(); @@ -56,6 +52,9 @@ public void setUp() throws Exception { username = System.getProperty("user.name"); fs = dfsCluster.getFileSystem(); + fs.enableErasureCodingPolicy("RS-6-3-1024k"); + fs.enableErasureCodingPolicy("RS-3-2-1024k"); + fs.enableErasureCodingPolicy("XOR-2-1-1024k"); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUnbuffer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUnbuffer.java index e25a7548852..b112e306c82 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUnbuffer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUnbuffer.java @@ -27,12 +27,18 @@ import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.io.IOUtils; import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.mockito.Mockito; public class TestUnbuffer { private static final Log LOG = LogFactory.getLog(TestUnbuffer.class.getName()); + @Rule + public ExpectedException exception = ExpectedException.none(); + /** * Test that calling Unbuffer closes sockets. */ @@ -123,4 +129,19 @@ public void testOpenManyFilesViaTcp() throws Exception { } } } + + /** + * Test unbuffer method which throws an Exception with class name included. + */ + @Test + public void testUnbufferException() { + FSInputStream in = Mockito.mock(FSInputStream.class); + FSDataInputStream fs = new FSDataInputStream(in); + + exception.expect(UnsupportedOperationException.class); + exception.expectMessage("this stream " + in.getClass().getName() + + " does not support unbuffering"); + + fs.unbuffer(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java index f3572ff1b7c..c6fe1a23b71 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java @@ -71,7 +71,6 @@ import java.util.UUID; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.stream.Collectors; import com.google.common.base.Charsets; import com.google.common.base.Joiner; @@ -109,6 +108,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster.NameNodeInfo; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.client.HdfsDataInputStream; +import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; import org.apache.hadoop.hdfs.protocol.CachePoolInfo; @@ -117,8 +117,8 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates; import org.apache.hadoop.hdfs.protocol.DatanodeInfo.DatanodeInfoBuilder; -import org.apache.hadoop.hdfs.protocol.ECBlockGroupsStats; -import org.apache.hadoop.hdfs.protocol.BlocksStats; +import org.apache.hadoop.hdfs.protocol.ECBlockGroupStats; +import org.apache.hadoop.hdfs.protocol.ReplicatedBlockStats; import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; @@ -164,6 +164,8 @@ import org.apache.hadoop.hdfs.tools.JMXGet; import org.apache.hadoop.io.EnumSetWritable; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.erasurecode.ECSchema; +import org.apache.hadoop.io.erasurecode.ErasureCodeConstants; import org.apache.hadoop.io.nativeio.NativeIO; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.unix.DomainSocket; @@ -289,12 +291,13 @@ public static void setEditLogForTesting(FSNamesystem fsn, FSEditLog newLog) { Whitebox.setInternalState(fsn.getFSDirectory(), "editLog", newLog); } - public static void enableAllECPolicies(Configuration conf) { - // Enable all the available EC policies - String policies = SystemErasureCodingPolicies.getPolicies().stream() - .map(ErasureCodingPolicy::getName) - .collect(Collectors.joining(",")); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, policies); + public static void enableAllECPolicies(DistributedFileSystem fs) + throws IOException { + // Enable all available EC policies + for (ErasureCodingPolicy ecPolicy : + SystemErasureCodingPolicies.getPolicies()) { + fs.enableErasureCodingPolicy(ecPolicy.getName()); + } } /** class MyFile contains enough information to recreate the contents of @@ -1464,6 +1467,33 @@ public static void runOperations(MiniDFSCluster cluster, new byte[]{0x37, 0x38, 0x39}); // OP_REMOVE_XATTR filesystem.removeXAttr(pathConcatTarget, "user.a2"); + + // OP_ADD_ERASURE_CODING_POLICY + ErasureCodingPolicy newPolicy1 = + new ErasureCodingPolicy(ErasureCodeConstants.RS_3_2_SCHEMA, 8 * 1024); + ErasureCodingPolicy[] policyArray = new ErasureCodingPolicy[] {newPolicy1}; + AddECPolicyResponse[] responses = + filesystem.addErasureCodingPolicies(policyArray); + newPolicy1 = responses[0].getPolicy(); + + // OP_ADD_ERASURE_CODING_POLICY - policy with extra options + Map extraOptions = new HashMap(); + extraOptions.put("dummyKey", "dummyValue"); + ECSchema schema = + new ECSchema(ErasureCodeConstants.RS_CODEC_NAME, 6, 10, extraOptions); + ErasureCodingPolicy newPolicy2 = new ErasureCodingPolicy(schema, 4 * 1024); + policyArray = new ErasureCodingPolicy[] {newPolicy2}; + responses = filesystem.addErasureCodingPolicies(policyArray); + newPolicy2 = responses[0].getPolicy(); + // OP_ENABLE_ERASURE_CODING_POLICY + filesystem.enableErasureCodingPolicy(newPolicy1.getName()); + filesystem.enableErasureCodingPolicy(newPolicy2.getName()); + // OP_DISABLE_ERASURE_CODING_POLICY + filesystem.disableErasureCodingPolicy(newPolicy1.getName()); + filesystem.disableErasureCodingPolicy(newPolicy2.getName()); + // OP_REMOVE_ERASURE_CODING_POLICY + filesystem.removeErasureCodingPolicy(newPolicy1.getName()); + filesystem.removeErasureCodingPolicy(newPolicy2.getName()); } public static void abortStream(DFSOutputStream out) throws IOException { @@ -1657,8 +1687,8 @@ public static boolean verifyFileReplicasOnStorageType(FileSystem fs, /** * Verify the aggregated {@link ClientProtocol#getStats()} block counts equal - * the sum of {@link ClientProtocol#getBlocksStats()} and - * {@link ClientProtocol#getECBlockGroupsStats()}. + * the sum of {@link ClientProtocol#getReplicatedBlockStats()} and + * {@link ClientProtocol#getECBlockGroupStats()}. * @throws Exception */ public static void verifyClientStats(Configuration conf, @@ -1667,36 +1697,36 @@ public static void verifyClientStats(Configuration conf, cluster.getFileSystem(0).getUri(), ClientProtocol.class).getProxy(); long[] aggregatedStats = cluster.getNameNode().getRpcServer().getStats(); - BlocksStats blocksStats = - client.getBlocksStats(); - ECBlockGroupsStats ecBlockGroupsStats = client.getECBlockGroupsStats(); + ReplicatedBlockStats replicatedBlockStats = + client.getReplicatedBlockStats(); + ECBlockGroupStats ecBlockGroupStats = client.getECBlockGroupStats(); assertEquals("Under replicated stats not matching!", aggregatedStats[ClientProtocol.GET_STATS_LOW_REDUNDANCY_IDX], aggregatedStats[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX]); assertEquals("Low redundancy stats not matching!", aggregatedStats[ClientProtocol.GET_STATS_LOW_REDUNDANCY_IDX], - blocksStats.getLowRedundancyBlocksStat() + - ecBlockGroupsStats.getLowRedundancyBlockGroupsStat()); + replicatedBlockStats.getLowRedundancyBlocks() + + ecBlockGroupStats.getLowRedundancyBlockGroups()); assertEquals("Corrupt blocks stats not matching!", aggregatedStats[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX], - blocksStats.getCorruptBlocksStat() + - ecBlockGroupsStats.getCorruptBlockGroupsStat()); + replicatedBlockStats.getCorruptBlocks() + + ecBlockGroupStats.getCorruptBlockGroups()); assertEquals("Missing blocks stats not matching!", aggregatedStats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX], - blocksStats.getMissingReplicaBlocksStat() + - ecBlockGroupsStats.getMissingBlockGroupsStat()); + replicatedBlockStats.getMissingReplicaBlocks() + + ecBlockGroupStats.getMissingBlockGroups()); assertEquals("Missing blocks with replication factor one not matching!", aggregatedStats[ClientProtocol.GET_STATS_MISSING_REPL_ONE_BLOCKS_IDX], - blocksStats.getMissingReplicationOneBlocksStat()); + replicatedBlockStats.getMissingReplicationOneBlocks()); assertEquals("Bytes in future blocks stats not matching!", aggregatedStats[ClientProtocol.GET_STATS_BYTES_IN_FUTURE_BLOCKS_IDX], - blocksStats.getBytesInFutureBlocksStat() + - ecBlockGroupsStats.getBytesInFutureBlockGroupsStat()); + replicatedBlockStats.getBytesInFutureBlocks() + + ecBlockGroupStats.getBytesInFutureBlockGroups()); assertEquals("Pending deletion blocks stats not matching!", aggregatedStats[ClientProtocol.GET_STATS_PENDING_DELETION_BLOCKS_IDX], - blocksStats.getPendingDeletionBlocksStat() + - ecBlockGroupsStats.getPendingDeletionBlockGroupsStat()); + replicatedBlockStats.getPendingDeletionBlocks() + + ecBlockGroupStats.getPendingDeletionBlocks()); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/ReadStripedFileWithDecodingHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/ReadStripedFileWithDecodingHelper.java index 4202969ee43..7057010663b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/ReadStripedFileWithDecodingHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/ReadStripedFileWithDecodingHelper.java @@ -81,11 +81,11 @@ public static MiniDFSCluster initializeCluster() throws IOException { 0); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, false); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); MiniDFSCluster myCluster = new MiniDFSCluster.Builder(conf) .numDataNodes(NUM_DATANODES) .build(); + myCluster.getFileSystem().enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); myCluster.getFileSystem().getClient().setErasureCodingPolicy("/", StripedFileTestUtil.getDefaultECPolicy().getName()); return myCluster; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java index 97f34f29417..1b462a9a8ec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java @@ -72,7 +72,7 @@ private static long checkTxid(EventBatch batch, long prevTxid){ */ @Test public void testOpcodeCount() { - Assert.assertEquals(50, FSEditLogOpCodes.values().length); + Assert.assertEquals(54, FSEditLogOpCodes.values().length); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedInputStream.java index 4f67a0a1a29..f94b7abeee2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedInputStream.java @@ -93,8 +93,6 @@ public void setup() throws IOException { conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 0); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - getEcPolicy().getName()); if (ErasureCodeNative.isNativeCodeLoaded()) { conf.set( CodecUtil.IO_ERASURECODE_CODEC_RS_RAWCODERS_KEY, @@ -108,6 +106,7 @@ public void setup() throws IOException { DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, true); } fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy(getEcPolicy().getName()); fs.mkdirs(dirPath); fs.getClient() .setErasureCodingPolicy(dirPath.toString(), ecPolicy.getName()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java index c0cfea22007..3714542411d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java @@ -94,11 +94,10 @@ public void setup() throws IOException { CodecUtil.IO_ERASURECODE_CODEC_RS_RAWCODERS_KEY, NativeRSRawErasureCoderFactory.CODER_NAME); } - DFSTestUtil.enableAllECPolicies(conf); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); - cluster.getFileSystem().getClient().setErasureCodingPolicy("/", ecPolicy - .getName()); fs = cluster.getFileSystem(); + DFSTestUtil.enableAllECPolicies(fs); + fs.getClient().setErasureCodingPolicy("/", ecPolicy.getName()); } @After diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailure.java index f63a3538150..57da4399491 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailure.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStreamWithFailure.java @@ -42,6 +42,7 @@ import org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawErasureCoderFactory; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.util.StringUtils; import org.apache.log4j.Level; import org.junit.Assert; @@ -216,10 +217,10 @@ private void setup(Configuration conf) throws IOException { CodecUtil.IO_ERASURECODE_CODEC_RS_RAWCODERS_KEY, NativeRSRawErasureCoderFactory.CODER_NAME); } - DFSTestUtil.enableAllECPolicies(conf); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); cluster.waitActive(); dfs = cluster.getFileSystem(); + DFSTestUtil.enableAllECPolicies(dfs); dfs.mkdirs(dir); dfs.setErasureCodingPolicy(dir, ecPolicy.getName()); } @@ -282,7 +283,7 @@ public void testBlockTokenExpired() throws Exception { @Test(timeout = 90000) public void testAddBlockWhenNoSufficientDataBlockNumOfNodes() - throws IOException { + throws Exception { HdfsConfiguration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); try { @@ -301,20 +302,18 @@ public void testAddBlockWhenNoSufficientDataBlockNumOfNodes() DatanodeReportType.LIVE); assertEquals("Mismatches number of live Dns ", numDatanodes, info.length); final Path dirFile = new Path(dir, "ecfile"); - FSDataOutputStream out; - try { - out = dfs.create(dirFile, true); - out.write("something".getBytes()); - out.flush(); - out.close(); - Assert.fail("Failed to validate available dns against blkGroupSize"); - } catch (IOException ioe) { - // expected - GenericTestUtils.assertExceptionContains("Failed to get " + - dataBlocks + " nodes from namenode: blockGroupSize= " + - (dataBlocks + parityBlocks) + ", blocks.length= " + - numDatanodes, ioe); - } + LambdaTestUtils.intercept( + IOException.class, + "File " + dirFile + " could only be written to " + + numDatanodes + " of the " + dataBlocks + " required nodes for " + + getEcPolicy().getName(), + () -> { + try (FSDataOutputStream out = dfs.create(dirFile, true)) { + out.write("something".getBytes()); + out.flush(); + } + return 0; + }); } finally { tearDown(); } @@ -493,8 +492,8 @@ private void runTest(final int length, final int[] killPos, final BlockManager bm = nn.getNamesystem().getBlockManager(); final BlockTokenSecretManager sm = bm.getBlockTokenSecretManager(); - // set a short token lifetime (1 second) - SecurityTestUtil.setBlockTokenLifetime(sm, 1000L); + // set a short token lifetime (6 second) + SecurityTestUtil.setBlockTokenLifetime(sm, 6000L); } final AtomicInteger pos = new AtomicInteger(); @@ -631,6 +630,8 @@ int getBase() { private void run(int offset) { int base = getBase(); + // TODO: Fix and re-enable these flaky tests. See HDFS-12417. + assumeTrue("Test has been temporarily disabled. See HDFS-12417.", false); assumeTrue(base >= 0); final int i = offset + base; final Integer length = getLength(i); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java index bb394feb7cd..7bd85b4989c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java @@ -131,8 +131,6 @@ public void setup() throws IOException { conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, false); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); numDNs = dataBlocks + parityBlocks + 2; cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); @@ -142,6 +140,8 @@ public void setup() throws IOException { bm = fsn.getBlockManager(); client = getDfsClient(cluster.getNameNode(0), conf); + dfs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); dfs.mkdirs(ecDir); dfs.setErasureCodingPolicy(ecDir, StripedFileTestUtil.getDefaultECPolicy().getName()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java index 8e54e5f833b..987992e5a38 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java @@ -50,7 +50,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.atomic.AtomicReference; -import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.CommonConfigurationKeys; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystemWithECFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystemWithECFile.java new file mode 100644 index 00000000000..d4e01b72d4e --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystemWithECFile.java @@ -0,0 +1,186 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertTrue; + +/** + * Testing correctness of FileSystem.getFileBlockLocations and + * FileSystem.listFiles for erasure coded files. + */ +public class TestDistributedFileSystemWithECFile { + private final ErasureCodingPolicy ecPolicy = + StripedFileTestUtil.getDefaultECPolicy(); + private final int cellSize = ecPolicy.getCellSize(); + private final short dataBlocks = (short) ecPolicy.getNumDataUnits(); + private final short parityBlocks = (short) ecPolicy.getNumParityUnits(); + private final int numDNs = dataBlocks + parityBlocks; + private final int stripesPerBlock = 4; + private final int blockSize = stripesPerBlock * cellSize; + private final int blockGroupSize = blockSize * dataBlocks; + + private MiniDFSCluster cluster; + private FileContext fileContext; + private DistributedFileSystem fs; + private Configuration conf = new HdfsConfiguration(); + + @Before + public void setup() throws IOException { + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); + conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, + false); + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); + fileContext = FileContext.getFileContext(cluster.getURI(0), conf); + fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); + fs.mkdirs(new Path("/ec")); + cluster.getFileSystem().getClient().setErasureCodingPolicy("/ec", + StripedFileTestUtil.getDefaultECPolicy().getName()); + } + + @After + public void tearDown() throws IOException { + if (cluster != null) { + cluster.shutdown(); + cluster = null; + } + } + + private void createFile(String path, int size) throws Exception { + byte[] expected = StripedFileTestUtil.generateBytes(size); + Path src = new Path(path); + DFSTestUtil.writeFile(fs, src, new String(expected)); + StripedFileTestUtil.waitBlockGroupsReported(fs, src.toString()); + StripedFileTestUtil.verifyLength(fs, src, size); + } + + @Test(timeout=60000) + public void testListECFilesSmallerThanOneCell() throws Exception { + createFile("/ec/smallcell", 1); + final List retVal = new ArrayList<>(); + final RemoteIterator iter = + cluster.getFileSystem().listFiles(new Path("/ec"), true); + while (iter.hasNext()) { + retVal.add(iter.next()); + } + assertTrue(retVal.size() == 1); + LocatedFileStatus fileStatus = retVal.get(0); + assertSmallerThanOneCell(fileStatus.getBlockLocations()); + + BlockLocation[] locations = cluster.getFileSystem().getFileBlockLocations( + fileStatus, 0, fileStatus.getLen()); + assertSmallerThanOneCell(locations); + + //Test FileContext + fileStatus = fileContext.listLocatedStatus(new Path("/ec")).next(); + assertSmallerThanOneCell(fileStatus.getBlockLocations()); + locations = fileContext.getFileBlockLocations(new Path("/ec/smallcell"), + 0, fileStatus.getLen()); + assertSmallerThanOneCell(locations); + } + + private void assertSmallerThanOneCell(BlockLocation[] locations) + throws IOException { + assertTrue(locations.length == 1); + BlockLocation blockLocation = locations[0]; + assertTrue(blockLocation.getOffset() == 0); + assertTrue(blockLocation.getLength() == 1); + assertTrue(blockLocation.getHosts().length == 1 + parityBlocks); + } + + @Test(timeout=60000) + public void testListECFilesSmallerThanOneStripe() throws Exception { + int dataBlocksNum = 3; + createFile("/ec/smallstripe", cellSize * dataBlocksNum); + RemoteIterator iter = + cluster.getFileSystem().listFiles(new Path("/ec"), true); + LocatedFileStatus fileStatus = iter.next(); + assertSmallerThanOneStripe(fileStatus.getBlockLocations(), dataBlocksNum); + + BlockLocation[] locations = cluster.getFileSystem().getFileBlockLocations( + fileStatus, 0, fileStatus.getLen()); + assertSmallerThanOneStripe(locations, dataBlocksNum); + + //Test FileContext + fileStatus = fileContext.listLocatedStatus(new Path("/ec")).next(); + assertSmallerThanOneStripe(fileStatus.getBlockLocations(), dataBlocksNum); + locations = fileContext.getFileBlockLocations(new Path("/ec/smallstripe"), + 0, fileStatus.getLen()); + assertSmallerThanOneStripe(locations, dataBlocksNum); + } + + private void assertSmallerThanOneStripe(BlockLocation[] locations, + int dataBlocksNum) throws IOException { + int expectedHostNum = dataBlocksNum + parityBlocks; + assertTrue(locations.length == 1); + BlockLocation blockLocation = locations[0]; + assertTrue(blockLocation.getHosts().length == expectedHostNum); + assertTrue(blockLocation.getOffset() == 0); + assertTrue(blockLocation.getLength() == dataBlocksNum * cellSize); + } + + @Test(timeout=60000) + public void testListECFilesMoreThanOneBlockGroup() throws Exception { + createFile("/ec/group", blockGroupSize + 123); + RemoteIterator iter = + cluster.getFileSystem().listFiles(new Path("/ec"), true); + LocatedFileStatus fileStatus = iter.next(); + assertMoreThanOneBlockGroup(fileStatus.getBlockLocations(), 123); + + BlockLocation[] locations = cluster.getFileSystem().getFileBlockLocations( + fileStatus, 0, fileStatus.getLen()); + assertMoreThanOneBlockGroup(locations, 123); + + //Test FileContext + iter = fileContext.listLocatedStatus(new Path("/ec")); + fileStatus = iter.next(); + assertMoreThanOneBlockGroup(fileStatus.getBlockLocations(), 123); + locations = fileContext.getFileBlockLocations(new Path("/ec/group"), + 0, fileStatus.getLen()); + assertMoreThanOneBlockGroup(locations, 123); + } + + private void assertMoreThanOneBlockGroup(BlockLocation[] locations, + int lastBlockSize) throws IOException { + assertTrue(locations.length == 2); + BlockLocation fistBlockGroup = locations[0]; + assertTrue(fistBlockGroup.getHosts().length == numDNs); + assertTrue(fistBlockGroup.getOffset() == 0); + assertTrue(fistBlockGroup.getLength() == blockGroupSize); + BlockLocation lastBlock = locations[1]; + assertTrue(lastBlock.getHosts().length == 1 + parityBlocks); + assertTrue(lastBlock.getOffset() == blockGroupSize); + assertTrue(lastBlock.getLength() == lastBlockSize); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZonesWithKMS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZonesWithKMS.java index 6f533625cc7..959e724b58f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZonesWithKMS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZonesWithKMS.java @@ -21,7 +21,6 @@ import com.google.common.base.Supplier; import org.apache.hadoop.crypto.key.kms.KMSClientProvider; -import org.apache.hadoop.crypto.key.kms.LoadBalancingKMSClientProvider; import org.apache.hadoop.crypto.key.kms.server.MiniKMS; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; @@ -70,21 +69,14 @@ public void teardown() { protected void setProvider() { } - private KMSClientProvider getKMSClientProvider() { - LoadBalancingKMSClientProvider lbkmscp = - (LoadBalancingKMSClientProvider) Whitebox - .getInternalState(cluster.getNamesystem().getProvider(), "extension"); - assert lbkmscp.getProviders().length == 1; - return lbkmscp.getProviders()[0]; - } - @Test(timeout = 120000) public void testCreateEZPopulatesEDEKCache() throws Exception { final Path zonePath = new Path("/TestEncryptionZone"); fsWrapper.mkdir(zonePath, FsPermission.getDirDefault(), false); dfsAdmin.createEncryptionZone(zonePath, TEST_KEY, NO_TRASH); @SuppressWarnings("unchecked") - KMSClientProvider kcp = getKMSClientProvider(); + KMSClientProvider kcp = (KMSClientProvider) Whitebox + .getInternalState(cluster.getNamesystem().getProvider(), "extension"); assertTrue(kcp.getEncKeyQueueSize(TEST_KEY) > 0); } @@ -118,7 +110,8 @@ public void testWarmupEDEKCacheOnStartup() throws Exception { dfsAdmin.createEncryptionZone(zonePath, anotherKey, NO_TRASH); @SuppressWarnings("unchecked") - KMSClientProvider spy = getKMSClientProvider(); + KMSClientProvider spy = (KMSClientProvider) Whitebox + .getInternalState(cluster.getNamesystem().getProvider(), "extension"); assertTrue("key queue is empty after creating encryption zone", spy.getEncKeyQueueSize(TEST_KEY) > 0); @@ -129,7 +122,9 @@ public void testWarmupEDEKCacheOnStartup() throws Exception { GenericTestUtils.waitFor(new Supplier() { @Override public Boolean get() { - final KMSClientProvider kspy = getKMSClientProvider(); + final KMSClientProvider kspy = (KMSClientProvider) Whitebox + .getInternalState(cluster.getNamesystem().getProvider(), + "extension"); return kspy.getEncKeyQueueSize(TEST_KEY) > 0; } }, 1000, 60000); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodeBenchmarkThroughput.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodeBenchmarkThroughput.java index be962dc4cd8..da3407d2fc1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodeBenchmarkThroughput.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodeBenchmarkThroughput.java @@ -48,11 +48,11 @@ public static void setup() throws IOException { conf = new HdfsConfiguration(); int numDN = ErasureCodeBenchmarkThroughput.getEcPolicy().getNumDataUnits() + ErasureCodeBenchmarkThroughput.getEcPolicy().getNumParityUnits(); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - ErasureCodeBenchmarkThroughput.getEcPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDN).build(); cluster.waitActive(); fs = cluster.getFileSystem(); + ((DistributedFileSystem)fs).enableErasureCodingPolicy( + ErasureCodeBenchmarkThroughput.getEcPolicy().getName()); } @AfterClass diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicies.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicies.java index e095602c6ea..4f2040b60f3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicies.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicies.java @@ -77,13 +77,13 @@ public void setupCluster() throws IOException { ecPolicy = getEcPolicy(); conf = new HdfsConfiguration(); conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); - DFSTestUtil.enableAllECPolicies(conf); cluster = new MiniDFSCluster.Builder(conf). numDataNodes(ecPolicy.getNumDataUnits() + ecPolicy.getNumParityUnits()). build(); cluster.waitActive(); fs = cluster.getFileSystem(); namesystem = cluster.getNamesystem(); + DFSTestUtil.enableAllECPolicies(fs); } @After @@ -206,16 +206,9 @@ public void testBasicSetECPolicy() // Verify that policies are successfully loaded even when policies // are disabled - cluster.getConfiguration(0).set( - DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, ""); cluster.restartNameNodes(); cluster.waitActive(); - // Only default policy should be enabled after restart - Assert.assertEquals("Only default policy should be enabled after restart", - 1, - ErasureCodingPolicyManager.getInstance().getEnabledPolicies().length); - // Already set directory-level policies should still be in effect Path disabledPolicy = new Path(dir1, "afterDisabled"); Assert.assertEquals("Dir does not have policy set", @@ -725,7 +718,7 @@ public void testAddErasureCodingPolicies() throws Exception { policyArray = new ErasureCodingPolicy[]{policy0}; responses = fs.addErasureCodingPolicies(policyArray); assertEquals(1, responses.length); - assertFalse(responses[0].isSucceed()); + assertTrue(responses[0].isSucceed()); // Test add policy successfully newPolicy = diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicyWithSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicyWithSnapshot.java index fbeada67dc7..6ab018bbea8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicyWithSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicyWithSnapshot.java @@ -53,11 +53,10 @@ public void setupCluster() throws IOException { groupSize = (short) (ecPolicy.getNumDataUnits() + ecPolicy.getNumParityUnits()); conf = new HdfsConfiguration(); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - ecPolicy.getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize).build(); cluster.waitActive(); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy(ecPolicy.getName()); } @After diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java index b804523b3c0..9d6687c6c46 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java @@ -77,8 +77,6 @@ public void setup() throws IOException { conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, false); conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 0); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); Path ecPath = new Path(ecDir); cluster.getFileSystem().mkdir(ecPath, FsPermission.getDirDefault()); @@ -86,7 +84,8 @@ public void setup() throws IOException { StripedFileTestUtil.getDefaultECPolicy().getName()); fs = cluster.getFileSystem(); client = fs.getClient(); - + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); bytesPerCRC = conf.getInt( HdfsClientConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, HdfsClientConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileStatusWithECPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileStatusWithECPolicy.java index e04f9573256..077cf3a115f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileStatusWithECPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileStatusWithECPolicy.java @@ -45,13 +45,13 @@ public class TestFileStatusWithECPolicy { @Before public void before() throws IOException { HdfsConfiguration conf = new HdfsConfiguration(); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); cluster.waitActive(); fs = cluster.getFileSystem(); client = fs.getClient(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); } @After diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecoveryStriped.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecoveryStriped.java index 86b1aadf6ea..2846dbf7f00 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecoveryStriped.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecoveryStriped.java @@ -88,12 +88,11 @@ public void setup() throws IOException { false); conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 0); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - ecPolicy.getName()); final int numDNs = dataBlocks + parityBlocks; cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); cluster.waitActive(); dfs = cluster.getFileSystem(); + dfs.enableErasureCodingPolicy(ecPolicy.getName()); dfs.mkdirs(dir); dfs.setErasureCodingPolicy(dir, ecPolicy.getName()); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReadStripedFileWithMissingBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReadStripedFileWithMissingBlocks.java index 34cba92ad21..f3b8dd84f82 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReadStripedFileWithMissingBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReadStripedFileWithMissingBlocks.java @@ -58,12 +58,11 @@ public class TestReadStripedFileWithMissingBlocks { public void setup() throws IOException { conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 0); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - ecPolicy.getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); cluster.getFileSystem().getClient().setErasureCodingPolicy( "/", ecPolicy.getName()); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy(ecPolicy.getName()); } public void tearDown() throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java index 7cd34c2acd7..72b14129484 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java @@ -105,12 +105,12 @@ public void setup() throws IOException { CodecUtil.IO_ERASURECODE_CODEC_RS_RAWCODERS_KEY, NativeRSRawErasureCoderFactory.CODER_NAME); } - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(dnNum).build(); cluster.waitActive(); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); fs.getClient().setErasureCodingPolicy("/", StripedFileTestUtil.getDefaultECPolicy().getName()); @@ -447,7 +447,7 @@ private void testNNSendsErasureCodingTasks(int deadDN) throws Exception { conf.setInt( DFSConfigKeys.DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_KEY, 10); conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 20); - conf.setInt(DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_STRIPED_BLK_THREADS_KEY, + conf.setInt(DFSConfigKeys.DFS_DN_EC_RECONSTRUCTION_THREADS_KEY, 2); cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(numDataNodes).build(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeModeWithStripedFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeModeWithStripedFile.java index edecbf27a6a..3d3ec9c6c66 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeModeWithStripedFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeModeWithStripedFile.java @@ -61,12 +61,12 @@ public void setup() throws IOException { conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 100); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); cluster.getFileSystem().getClient().setErasureCodingPolicy("/", StripedFileTestUtil.getDefaultECPolicy().getName()); cluster.waitActive(); + cluster.getFileSystem().enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); } @After diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSetrepIncreasing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSetrepIncreasing.java index 50d7b2756f9..497d450de25 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSetrepIncreasing.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSetrepIncreasing.java @@ -110,13 +110,13 @@ public void testSetRepWithStoragePolicyOnEmptyFile() throws Exception { public void testSetRepOnECFile() throws Exception { ClientProtocol client; Configuration conf = new HdfsConfiguration(); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1) .build(); cluster.waitActive(); client = NameNodeProxies.createProxy(conf, cluster.getFileSystem(0).getUri(), ClientProtocol.class).getProxy(); + client.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); client.setErasureCodingPolicy("/", StripedFileTestUtil.getDefaultECPolicy().getName()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestUnsetAndChangeDirectoryEcPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestUnsetAndChangeDirectoryEcPolicy.java index 5371e205ac3..529a110c0ef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestUnsetAndChangeDirectoryEcPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestUnsetAndChangeDirectoryEcPolicy.java @@ -70,11 +70,11 @@ public void setup() throws IOException { CodecUtil.IO_ERASURECODE_CODEC_RS_RAWCODERS_KEY, NativeRSRawErasureCoderFactory.CODER_NAME); } - DFSTestUtil.enableAllECPolicies(conf); cluster = new MiniDFSCluster.Builder(conf).numDataNodes( dataBlocks + parityBlocks).build(); cluster.waitActive(); fs = cluster.getFileSystem(); + DFSTestUtil.enableAllECPolicies(fs); } @After diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteReadStripedFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteReadStripedFile.java index 9b14df14c34..f27c9786db6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteReadStripedFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteReadStripedFile.java @@ -76,10 +76,10 @@ public void setup() throws IOException { conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, false); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); fs.mkdirs(new Path("/ec")); cluster.getFileSystem().getClient().setErasureCodingPolicy("/ec", StripedFileTestUtil.getDefaultECPolicy().getName()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java index 8555e5d0ad9..2fe0a1c2957 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java @@ -118,7 +118,7 @@ public void testClientAndServerDoNotHaveCommonQop() throws Exception { HdfsConfiguration clientConf = new HdfsConfiguration(clusterConf); clientConf.set(DFS_DATA_TRANSFER_PROTECTION_KEY, "authentication"); exception.expect(IOException.class); - exception.expectMessage("could only be replicated to 0 nodes"); + exception.expectMessage("could only be written to 0"); doTest(clientConf); } @@ -140,7 +140,7 @@ public void testServerSaslNoClientSasl() throws Exception { "configured or not supported in client"); } catch (IOException e) { GenericTestUtils.assertMatches(e.getMessage(), - "could only be replicated to 0 nodes"); + "could only be written to 0"); } finally { logs.stopCapturing(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumCall.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumCall.java index 506497e6ae4..97cf2f3c068 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumCall.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumCall.java @@ -23,7 +23,7 @@ import java.util.TreeMap; import java.util.concurrent.TimeoutException; -import org.apache.hadoop.hdfs.qjournal.client.QuorumCall; +import org.apache.hadoop.util.FakeTimer; import org.junit.Test; import com.google.common.base.Joiner; @@ -83,4 +83,33 @@ public void testQuorumFailsWithoutResponse() throws Exception { } } + @Test(timeout=10000) + public void testQuorumSucceedsWithLongPause() throws Exception { + final Map> futures = ImmutableMap.of( + "f1", SettableFuture.create()); + + FakeTimer timer = new FakeTimer() { + private int callCount = 0; + @Override + public long monotonicNowNanos() { + callCount++; + if (callCount == 1) { + long old = super.monotonicNowNanos(); + advance(1000000); + return old; + } else if (callCount == 10) { + futures.get("f1").set("first future"); + return super.monotonicNowNanos(); + } else { + return super.monotonicNowNanos(); + } + } + }; + + QuorumCall q = QuorumCall.create(futures, timer); + assertEquals(0, q.countResponses()); + + q.waitFor(1, 0, 0, 3000, "test"); // wait for 1 response + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java index 28ec7082537..77b50a178eb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java @@ -55,6 +55,7 @@ import com.google.common.base.Charsets; import com.google.common.primitives.Bytes; import com.google.common.primitives.Ints; +import org.mockito.Mockito; public class TestJournalNode { @@ -342,4 +343,24 @@ private void doPerfTest(int editsSize, int numEdits) throws Exception { System.err.println("Time per batch: " + avgRtt + "ms"); System.err.println("Throughput: " + throughput + " bytes/sec"); } + + /** + * Test case to check if JournalNode exits cleanly when httpserver or rpc + * server fails to start. Call to JournalNode start should fail with bind + * exception as the port is in use by the JN started in @Before routine + */ + @Test + public void testJournalNodeStartupFailsCleanly() { + JournalNode jNode = Mockito.spy(new JournalNode()); + try { + jNode.setConf(conf); + jNode.start(); + fail("Should throw bind exception"); + } catch (Exception e) { + GenericTestUtils + .assertExceptionContains("java.net.BindException: Port in use", e); + } + Mockito.verify(jNode).stop(1); + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java index ec9c39a622a..a900ad191da 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java @@ -1981,8 +1981,6 @@ private void doTestBalancerWithStripedFile(Configuration conf) throws Exception for (int i = 0; i < numOfDatanodes; i++) { racks[i] = "/rack" + (i % numOfRacks); } - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(numOfDatanodes) .racks(racks) @@ -1993,6 +1991,8 @@ private void doTestBalancerWithStripedFile(Configuration conf) throws Exception cluster.waitActive(); client = NameNodeProxies.createProxy(conf, cluster.getFileSystem(0).getUri(), ClientProtocol.class).getProxy(); + client.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); client.setErasureCodingPolicy("/", StripedFileTestUtil.getDefaultECPolicy().getName()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java index 4092e5ef33a..10289ed0e9c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java @@ -1030,8 +1030,7 @@ public void testStorageWithRemainingCapacity() throws Exception { 0x1BAD5EED); } catch (RemoteException re) { - GenericTestUtils.assertExceptionContains("nodes instead of " - + "minReplication", re); + GenericTestUtils.assertExceptionContains("of the 1 minReplication", re); } } finally { @@ -1368,8 +1367,6 @@ public void testPlacementPolicySatisfied() throws Exception { Configuration conf = new Configuration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); conf.setLong(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf) @@ -1382,6 +1379,8 @@ public void testPlacementPolicySatisfied() throws Exception { final Path ecDir = new Path("/ec"); final Path testFileUnsatisfied = new Path(ecDir, "test1"); final Path testFileSatisfied = new Path(ecDir, "test2"); + dfs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); cluster.getFileSystem().getClient().mkdirs(ecDir.toString(), null, true); cluster.getFileSystem().getClient() .setErasureCodingPolicy(ecDir.toString(), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockStatsMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockStatsMXBean.java index 701928d2ff4..5b03d8e9799 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockStatsMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockStatsMXBean.java @@ -41,8 +41,10 @@ import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; import org.junit.After; import org.junit.Before; +import org.junit.Rule; import org.junit.Test; import org.eclipse.jetty.util.ajax.JSON; +import org.junit.rules.Timeout; /** * Class for testing {@link BlockStatsMXBean} implementation @@ -51,6 +53,9 @@ public class TestBlockStatsMXBean { private MiniDFSCluster cluster; + @Rule + public Timeout globalTimeout = new Timeout(300000); + @Before public void setup() throws IOException { HdfsConfiguration conf = new HdfsConfiguration(); @@ -181,7 +186,7 @@ public void testStorageTypeStatsWhenStorageFailed() throws Exception { fail("Should throw exception, becuase no DISK storage available"); } catch (Exception e) { assertTrue(e.getMessage().contains( - "could only be replicated to 0 nodes instead")); + "could only be written to 0 of the 1 minReplication")); } // wait for heartbeat Thread.sleep(6000); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFSStriped.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFSStriped.java index 54f28053f64..7627cf5c6a5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFSStriped.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFSStriped.java @@ -55,8 +55,6 @@ public class TestBlockTokenWithDFSStriped extends TestBlockTokenWithDFS { private Configuration getConf() { Configuration conf = super.getConf(numDNs); conf.setInt("io.bytes.per.checksum", cellSize); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); return conf; } @@ -85,6 +83,8 @@ public void testRead() throws Exception { .nameNodeHttpPort(ServerSocketUtil.getPort(19870, 100)) .numDataNodes(numDNs) .build(); + cluster.getFileSystem().enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); cluster.getFileSystem().getClient().setErasureCodingPolicy("/", StripedFileTestUtil.getDefaultECPolicy().getName()); try { @@ -116,8 +116,6 @@ public void testAppend() throws Exception { public void testEnd2End() throws Exception { Configuration conf = new Configuration(); conf.setBoolean(DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY, true); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); new TestBalancer().integrationTestWithStripedFile(conf); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestComputeInvalidateWork.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestComputeInvalidateWork.java index 241391821fb..cf4299b5015 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestComputeInvalidateWork.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestComputeInvalidateWork.java @@ -24,7 +24,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; @@ -70,8 +69,6 @@ public void setup() throws Exception { ecPolicy = SystemErasureCodingPolicies.getByID( SystemErasureCodingPolicies.XOR_2_1_POLICY_ID); conf = new HdfsConfiguration(); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - ecPolicy.getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_OF_DATANODES) .build(); cluster.waitActive(); @@ -84,6 +81,7 @@ public void setup() throws Exception { // Create a striped file Path ecDir = new Path("/ec"); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy(ecPolicy.getName()); fs.mkdirs(ecDir); fs.getClient().setErasureCodingPolicy(ecDir.toString(), ecPolicy.getName()); ecFile = new Path(ecDir, "ec-file"); @@ -268,9 +266,9 @@ public void testDatanodeReRegistration() throws Exception { "Striped BlockGroups!", (long) expected, invalidateBlocks.numBlocks()); assertEquals("Unexpected invalidate count for replicas!", - totalReplicas, invalidateBlocks.getBlocksStat()); + totalReplicas, invalidateBlocks.getBlocks()); assertEquals("Unexpected invalidate count for striped block groups!", - totalStripedDataBlocks, invalidateBlocks.getECBlockGroupsStat()); + totalStripedDataBlocks, invalidateBlocks.getECBlocks()); } finally { namesystem.writeUnlock(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java index 3f8a5cd4845..3510bc3d769 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java @@ -78,10 +78,10 @@ private void verifyCorruptBlocksCount(CorruptReplicasMap corruptReplicasMap, assertEquals("Unexpected total corrupt blocks count!", totalExpectedCorruptBlocks, corruptReplicasMap.size()); assertEquals("Unexpected replica blocks count!", - expectedReplicaCount, corruptReplicasMap.getCorruptBlocksStat()); + expectedReplicaCount, corruptReplicasMap.getCorruptBlocks()); assertEquals("Unexpected striped blocks count!", expectedStripedBlockCount, - corruptReplicasMap.getCorruptECBlockGroupsStat()); + corruptReplicasMap.getCorruptECBlockGroups()); } @Test @@ -93,9 +93,9 @@ public void testCorruptReplicaInfo() assertEquals("Total number of corrupt blocks must initially be 0!", 0, crm.size()); assertEquals("Number of corrupt replicas must initially be 0!", - 0, crm.getCorruptBlocksStat()); + 0, crm.getCorruptBlocks()); assertEquals("Number of corrupt striped block groups must initially be 0!", - 0, crm.getCorruptECBlockGroupsStat()); + 0, crm.getCorruptECBlockGroups()); assertNull("Param n cannot be less than 0", crm.getCorruptBlockIdsForTesting(BlockType.CONTIGUOUS, -1, null)); assertNull("Param n cannot be greater than 100", diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestLowRedundancyBlockQueues.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestLowRedundancyBlockQueues.java index c65fc6495f8..2b28f1ef3ce 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestLowRedundancyBlockQueues.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestLowRedundancyBlockQueues.java @@ -50,16 +50,16 @@ private void verifyBlockStats(LowRedundancyBlocks queues, int corruptReplicationOneCount, int lowRedundancyStripedCount, int corruptStripedCount) { assertEquals("Low redundancy replica count incorrect!", - lowRedundancyReplicaCount, queues.getLowRedundancyBlocksStat()); + lowRedundancyReplicaCount, queues.getLowRedundancyBlocks()); assertEquals("Corrupt replica count incorrect!", - corruptReplicaCount, queues.getCorruptBlocksStat()); + corruptReplicaCount, queues.getCorruptBlocks()); assertEquals("Corrupt replica one count incorrect!", corruptReplicationOneCount, - queues.getCorruptReplicationOneBlocksStat()); + queues.getCorruptReplicationOneBlocks()); assertEquals("Low redundancy striped blocks count incorrect!", - lowRedundancyStripedCount, queues.getLowRedundancyECBlockGroupsStat()); + lowRedundancyStripedCount, queues.getLowRedundancyECBlockGroups()); assertEquals("Corrupt striped blocks count incorrect!", - corruptStripedCount, queues.getCorruptECBlockGroupsStat()); + corruptStripedCount, queues.getCorruptECBlockGroups()); assertEquals("Low Redundancy count incorrect!", lowRedundancyReplicaCount + lowRedundancyStripedCount, queues.getLowRedundancyBlockCount()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReconstructStripedBlocksWithRackAwareness.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReconstructStripedBlocksWithRackAwareness.java index aaa48997eac..7d16017c0d4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReconstructStripedBlocksWithRackAwareness.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReconstructStripedBlocksWithRackAwareness.java @@ -145,13 +145,12 @@ private DataNode getDataNode(String host) { public void testReconstructForNotEnoughRacks() throws Exception { LOG.info("cluster hosts: {}, racks: {}", Arrays.asList(hosts), Arrays.asList(racks)); - - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).racks(racks).hosts(hosts) .numDataNodes(hosts.length).build(); cluster.waitActive(); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); fs.setErasureCodingPolicy(new Path("/"), StripedFileTestUtil.getDefaultECPolicy().getName()); FSNamesystem fsn = cluster.getNamesystem(); @@ -219,12 +218,12 @@ public void testReconstructForNotEnoughRacks() throws Exception { @Test public void testChooseExcessReplicasToDelete() throws Exception { - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).racks(racks).hosts(hosts) .numDataNodes(hosts.length).build(); cluster.waitActive(); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); fs.setErasureCodingPolicy(new Path("/"), StripedFileTestUtil.getDefaultECPolicy().getName()); @@ -271,8 +270,6 @@ public void testChooseExcessReplicasToDelete() throws Exception { */ @Test public void testReconstructionWithDecommission() throws Exception { - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); final String[] rackNames = getRacks(dataBlocks + parityBlocks + 2, dataBlocks); final String[] hostNames = getHosts(dataBlocks + parityBlocks + 2); @@ -281,6 +278,8 @@ public void testReconstructionWithDecommission() throws Exception { .numDataNodes(hostNames.length).build(); cluster.waitActive(); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); fs.setErasureCodingPolicy(new Path("/"), StripedFileTestUtil.getDefaultECPolicy().getName()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSequentialBlockGroupId.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSequentialBlockGroupId.java index c5066a04a2a..241c2dcf991 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSequentialBlockGroupId.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestSequentialBlockGroupId.java @@ -31,10 +31,10 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.StripedFileTestUtil; @@ -72,7 +72,7 @@ public class TestSequentialBlockGroupId { private final int fileLen = blockSize * dataBlocks * blockGrpCount; private MiniDFSCluster cluster; - private FileSystem fs; + private DistributedFileSystem fs; private SequentialBlockGroupIdGenerator blockGrpIdGenerator; private Path ecDir = new Path("/ecDir"); @@ -81,12 +81,12 @@ public void setup() throws Exception { Configuration conf = new HdfsConfiguration(); conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); cluster.waitActive(); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); blockGrpIdGenerator = cluster.getNamesystem().getBlockManager() .getBlockIdManager().getBlockGroupIdGenerator(); fs.mkdirs(ecDir); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java index ee2afbbd8b5..7194385090e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java @@ -72,13 +72,13 @@ public void setup() throws IOException { conf = new Configuration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); cluster.waitActive(); cluster.getFileSystem().getClient().setErasureCodingPolicy("/", StripedFileTestUtil.getDefaultECPolicy().getName()); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); } @After @@ -90,6 +90,10 @@ public void tearDown() { @Test(timeout = 120000) public void testFullBlock() throws Exception { + Assert.assertEquals(0, getLongMetric("EcReconstructionReadTimeMillis")); + Assert.assertEquals(0, getLongMetric("EcReconstructionDecodingTimeMillis")); + Assert.assertEquals(0, getLongMetric("EcReconstructionWriteTimeMillis")); + doTest("/testEcMetrics", blockGroupSize, 0); Assert.assertEquals("EcReconstructionTasks should be ", @@ -103,6 +107,9 @@ public void testFullBlock() throws Exception { blockSize, getLongMetric("EcReconstructionBytesWritten")); Assert.assertEquals("EcReconstructionRemoteBytesRead should be ", 0, getLongMetricWithoutCheck("EcReconstructionRemoteBytesRead")); + Assert.assertTrue(getLongMetric("EcReconstructionReadTimeMillis") > 0); + Assert.assertTrue(getLongMetric("EcReconstructionDecodingTimeMillis") > 0); + Assert.assertTrue(getLongMetric("EcReconstructionWriteTimeMillis") > 0); } // A partial block, reconstruct the partial block diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/diskbalancer/command/TestDiskBalancerCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/diskbalancer/command/TestDiskBalancerCommand.java index a08f071ec90..eca102ed192 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/diskbalancer/command/TestDiskBalancerCommand.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/diskbalancer/command/TestDiskBalancerCommand.java @@ -478,9 +478,12 @@ public void testPlanNode() throws Exception { public void testPlanJsonNode() throws Exception { final String planArg = String.format("-%s %s", PLAN, "a87654a9-54c7-4693-8dd9-c9c7021dc340"); + final Path testPath = new Path( + PathUtils.getTestPath(getClass()), + GenericTestUtils.getMethodName()); final String cmdLine = String .format( - "hdfs diskbalancer %s", planArg); + "hdfs diskbalancer -out %s %s", testPath, planArg); runCommand(cmdLine); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java index 707d46fd3cd..8ff660fb8ef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java @@ -508,8 +508,6 @@ public void testMoverWithStripedFile() throws Exception { capacities[i][j]=capacity; } } - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(numOfDatanodes) .storagesPerDatanode(storagesPerDatanode) @@ -529,6 +527,8 @@ public void testMoverWithStripedFile() throws Exception { try { cluster.waitActive(); + cluster.getFileSystem().enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); // set "/bar" directory with HOT storage policy. ClientProtocol client = NameNodeProxies.createProxy(conf, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddOverReplicatedStripedBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddOverReplicatedStripedBlocks.java index ecbf99d8042..aad8e9b96a0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddOverReplicatedStripedBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddOverReplicatedStripedBlocks.java @@ -76,12 +76,11 @@ public void setup() throws IOException { conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 0); conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1); conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - ecPolicy.getName()); SimulatedFSDataset.setFactory(conf); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); cluster.waitActive(); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy(ecPolicy.getName()); fs.mkdirs(dirPath); fs.getClient().setErasureCodingPolicy(dirPath.toString(), ecPolicy.getName()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddStripedBlockInFBR.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddStripedBlockInFBR.java index a4f470b34d8..45e98ea30e7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddStripedBlockInFBR.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddStripedBlockInFBR.java @@ -20,7 +20,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; @@ -62,11 +61,11 @@ public class TestAddStripedBlockInFBR { @Before public void setup() throws IOException { Configuration conf = new HdfsConfiguration(); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize).build(); cluster.waitActive(); dfs = cluster.getFileSystem(); + dfs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); } @After diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddStripedBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddStripedBlocks.java index 623c444f714..ec13b448e23 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddStripedBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddStripedBlocks.java @@ -19,7 +19,6 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSStripedOutputStream; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -85,11 +84,10 @@ public class TestAddStripedBlocks { @Before public void setup() throws IOException { HdfsConfiguration conf = new HdfsConfiguration(); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - ecPolicy.getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize).build(); cluster.waitActive(); dfs = cluster.getFileSystem(); + dfs.enableErasureCodingPolicy(ecPolicy.getName()); dfs.getClient().setErasureCodingPolicy("/", ecPolicy.getName()); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java index 74be90cfb5a..b6c13188c23 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java @@ -175,8 +175,8 @@ public void testDeadNodeAsBlockTarget() throws Exception { // choose the targets, but local node should not get selected as this is not // part of the cluster anymore DatanodeStorageInfo[] results = bm.chooseTarget4NewBlock("/hello", 3, - clientNode, new HashSet(), 256 * 1024 * 1024L, null, (byte) 7, - BlockType.CONTIGUOUS, null); + clientNode, new HashSet<>(), 256 * 1024 * 1024L, null, (byte) 7, + BlockType.CONTIGUOUS, null, null); for (DatanodeStorageInfo datanodeStorageInfo : results) { assertFalse("Dead node should not be choosen", datanodeStorageInfo .getDatanodeDescriptor().equals(clientNode)); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java index 133a18e72d5..a13574fbdb2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java @@ -20,11 +20,13 @@ import java.io.FileNotFoundException; import java.util.AbstractMap; import java.util.ArrayList; +import java.util.Comparator; import java.util.EnumSet; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -49,16 +51,21 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.InternalDataNodeTestUtils; +import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.Node; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils.DelayAnswer; import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.Timeout; import org.mockito.Mockito; import org.mockito.internal.util.reflection.Whitebox; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY; /** * Test race between delete and other operations. For now only addBlock() @@ -71,6 +78,9 @@ public class TestDeleteRace { private static final Configuration conf = new HdfsConfiguration(); private MiniDFSCluster cluster; + @Rule + public Timeout timeout = new Timeout(60000 * 3); + @Test public void testDeleteAddBlockRace() throws Exception { testDeleteAddBlockRace(false); @@ -358,4 +368,78 @@ public void testDeleteAndCommitBlockSynchronizationRaceHasSnapshot() throws Exception { testDeleteAndCommitBlockSynchronizationRace(true); } + + + /** + * Test the sequence of deleting a file that has snapshot, + * and lease manager's hard limit recovery. + */ + @Test + public void testDeleteAndLeaseRecoveryHardLimitSnapshot() throws Exception { + final Path rootPath = new Path("/"); + final Configuration config = new Configuration(); + // Disable permissions so that another user can recover the lease. + config.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false); + config.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); + FSDataOutputStream stm = null; + try { + cluster = new MiniDFSCluster.Builder(config).numDataNodes(3).build(); + cluster.waitActive(); + + final DistributedFileSystem fs = cluster.getFileSystem(); + final Path testPath = new Path("/testfile"); + stm = fs.create(testPath); + LOG.info("test on " + testPath); + + // write a half block + AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2); + stm.hflush(); + + // create a snapshot, so delete does not release the file's inode. + SnapshotTestHelper.createSnapshot(fs, rootPath, "snap"); + + // delete the file without closing it. + fs.delete(testPath, false); + + // write enough bytes to trigger an addBlock, which would fail in + // the streamer. + AppendTestUtil.write(stm, 0, BLOCK_SIZE); + + // Mock a scenario that the lease reached hard limit. + final LeaseManager lm = (LeaseManager) Whitebox + .getInternalState(cluster.getNameNode().getNamesystem(), + "leaseManager"); + final TreeSet leases = + (TreeSet) Whitebox.getInternalState(lm, "sortedLeases"); + final TreeSet spyLeases = new TreeSet<>(new Comparator() { + @Override + public int compare(Lease o1, Lease o2) { + return Long.signum(o1.getLastUpdate() - o2.getLastUpdate()); + } + }); + while (!leases.isEmpty()) { + final Lease lease = leases.first(); + final Lease spyLease = Mockito.spy(lease); + Mockito.doReturn(true).when(spyLease).expiredHardLimit(); + spyLeases.add(spyLease); + leases.remove(lease); + } + Whitebox.setInternalState(lm, "sortedLeases", spyLeases); + + // wait for lease manager's background 'Monitor' class to check leases. + Thread.sleep(2 * conf.getLong(DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY, + DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT)); + + LOG.info("Now check we can restart"); + cluster.restartNameNodes(); + LOG.info("Restart finished"); + } finally { + if (stm != null) { + IOUtils.closeStream(stm); + } + if (cluster != null) { + cluster.shutdown(); + } + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEnabledECPolicies.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEnabledECPolicies.java index d769f8bc6b7..63bfa27b4c9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEnabledECPolicies.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEnabledECPolicies.java @@ -28,10 +28,8 @@ import org.junit.Test; import org.junit.rules.Timeout; -import java.util.Arrays; import java.util.HashSet; import java.util.Set; -import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; @@ -47,7 +45,7 @@ public class TestEnabledECPolicies { private void expectInvalidPolicy(String value) { HdfsConfiguration conf = new HdfsConfiguration(); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, + conf.set(DFSConfigKeys.DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY, value); try { ErasureCodingPolicyManager.getInstance().init(conf); @@ -60,11 +58,10 @@ private void expectInvalidPolicy(String value) { private void expectValidPolicy(String value, final int numEnabled) throws Exception { HdfsConfiguration conf = new HdfsConfiguration(); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - value); ErasureCodingPolicyManager manager = ErasureCodingPolicyManager.getInstance(); manager.init(conf); + manager.enablePolicy(value); assertEquals("Incorrect number of enabled policies", numEnabled, manager.getEnabledPolicies().length); } @@ -73,8 +70,8 @@ private void expectValidPolicy(String value, final int numEnabled) throws public void testDefaultPolicy() throws Exception { HdfsConfiguration conf = new HdfsConfiguration(); String defaultECPolicies = conf.get( - DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_DEFAULT); + DFSConfigKeys.DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY, + DFSConfigKeys.DFS_NAMENODE_EC_SYSTEM_DEFAULT_POLICY_DEFAULT); expectValidPolicy(defaultECPolicies, 1); } @@ -97,11 +94,6 @@ public void testInvalid() throws Exception { public void testValid() throws Exception { String ecPolicyName = StripedFileTestUtil.getDefaultECPolicy().getName(); expectValidPolicy(ecPolicyName, 1); - expectValidPolicy(ecPolicyName + ", ", 1); - expectValidPolicy(",", 1); - expectValidPolicy(", " + ecPolicyName, 1); - expectValidPolicy(" ", 1); - expectValidPolicy(" , ", 1); } @Test @@ -128,13 +120,12 @@ public void testGetPolicies() throws Exception { private void testGetPolicies(ErasureCodingPolicy[] enabledPolicies) throws Exception { HdfsConfiguration conf = new HdfsConfiguration(); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - Arrays.asList(enabledPolicies).stream() - .map(ErasureCodingPolicy::getName) - .collect(Collectors.joining(", "))); ErasureCodingPolicyManager manager = ErasureCodingPolicyManager.getInstance(); manager.init(conf); + for (ErasureCodingPolicy p : enabledPolicies) { + manager.enablePolicy(p.getName()); + } // Check that returned values are unique Set found = new HashSet<>(); for (ErasureCodingPolicy p : manager.getEnabledPolicies()) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java index 4467dc1068e..ec80bff3e0b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java @@ -47,8 +47,10 @@ import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.StripedFileTestUtil; +import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyState; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoContiguous; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoStriped; @@ -57,6 +59,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.EditLogValidation; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.erasurecode.ECSchema; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.PathUtils; import org.apache.log4j.Level; @@ -458,8 +461,6 @@ public void testFSEditLogOpCodes() throws IOException { public void testAddNewStripedBlock() throws IOException{ // start a cluster Configuration conf = new HdfsConfiguration(); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - testECPolicy.getName()); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(9) @@ -467,6 +468,7 @@ public void testAddNewStripedBlock() throws IOException{ cluster.waitActive(); DistributedFileSystem fs = cluster.getFileSystem(); FSNamesystem fns = cluster.getNamesystem(); + fs.enableErasureCodingPolicy(testECPolicy.getName()); String testDir = "/ec"; String testFile = "testfile_001"; @@ -533,8 +535,6 @@ public void testAddNewStripedBlock() throws IOException{ public void testUpdateStripedBlocks() throws IOException{ // start a cluster Configuration conf = new HdfsConfiguration(); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - testECPolicy.getName()); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(9) @@ -542,6 +542,7 @@ public void testUpdateStripedBlocks() throws IOException{ cluster.waitActive(); DistributedFileSystem fs = cluster.getFileSystem(); FSNamesystem fns = cluster.getNamesystem(); + fs.enableErasureCodingPolicy(testECPolicy.getName()); String testDir = "/ec"; String testFile = "testfile_002"; @@ -714,4 +715,84 @@ public void testHasNonEcBlockUsingStripedIDForUpdateBlocks() } } } + + @Test + public void testErasureCodingPolicyOperations() throws IOException { + // start a cluster + Configuration conf = new HdfsConfiguration(); + final int blockSize = 16 * 1024; + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); + MiniDFSCluster cluster = null; + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(9) + .build(); + cluster.waitActive(); + DistributedFileSystem fs = cluster.getFileSystem(); + + // 1. add new policy + ECSchema schema = new ECSchema("rs", 5, 3); + int cellSize = 2 * 1024; + ErasureCodingPolicy newPolicy = + new ErasureCodingPolicy(schema, cellSize, (byte) 0); + ErasureCodingPolicy[] policyArray = new ErasureCodingPolicy[]{newPolicy}; + AddECPolicyResponse[] responses = + fs.addErasureCodingPolicies(policyArray); + assertEquals(1, responses.length); + assertTrue(responses[0].isSucceed()); + newPolicy = responses[0].getPolicy(); + + // Restart NameNode without saving namespace + cluster.restartNameNodes(); + cluster.waitActive(); + + // check if new policy is reapplied through edit log + ErasureCodingPolicy ecPolicy = + ErasureCodingPolicyManager.getInstance().getByID(newPolicy.getId()); + assertEquals(ErasureCodingPolicyState.DISABLED, ecPolicy.getState()); + + // 2. enable policy + fs.enableErasureCodingPolicy(newPolicy.getName()); + cluster.restartNameNodes(); + cluster.waitActive(); + ecPolicy = + ErasureCodingPolicyManager.getInstance().getByID(newPolicy.getId()); + assertEquals(ErasureCodingPolicyState.ENABLED, ecPolicy.getState()); + + // create a new file, use the policy + final Path dirPath = new Path("/striped"); + final Path filePath = new Path(dirPath, "file"); + final int fileLength = blockSize * newPolicy.getNumDataUnits(); + fs.mkdirs(dirPath); + fs.setErasureCodingPolicy(dirPath, newPolicy.getName()); + final byte[] bytes = StripedFileTestUtil.generateBytes(fileLength); + DFSTestUtil.writeFile(fs, filePath, bytes); + + // 3. disable policy + fs.disableErasureCodingPolicy(newPolicy.getName()); + cluster.restartNameNodes(); + cluster.waitActive(); + ecPolicy = + ErasureCodingPolicyManager.getInstance().getByID(newPolicy.getId()); + assertEquals(ErasureCodingPolicyState.DISABLED, ecPolicy.getState()); + // read file + DFSTestUtil.readFileAsBytes(fs, filePath); + + // 4. remove policy + fs.removeErasureCodingPolicy(newPolicy.getName()); + cluster.restartNameNodes(); + cluster.waitActive(); + ecPolicy = + ErasureCodingPolicyManager.getInstance().getByID(newPolicy.getId()); + assertEquals(ErasureCodingPolicyState.REMOVED, ecPolicy.getState()); + // read file + DFSTestUtil.readFileAsBytes(fs, filePath); + + cluster.shutdown(); + cluster = null; + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java index 9256056b4e1..c9d3255a310 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -34,7 +35,9 @@ import java.util.EnumSet; import org.apache.hadoop.hdfs.StripedFileTestUtil; +import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse; import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyState; import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.protocolPB.PBHelperClient; @@ -43,6 +46,8 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoStriped; import org.apache.hadoop.hdfs.protocol.BlockType; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; +import org.apache.hadoop.io.erasurecode.ECSchema; +import org.apache.hadoop.ipc.RemoteException; import org.junit.Assert; import org.apache.hadoop.fs.permission.PermissionStatus; @@ -241,11 +246,11 @@ private void testSaveAndLoadStripedINodeFile(FSNamesystem fsn, Configuration con @Test public void testSaveAndLoadStripedINodeFile() throws IOException{ Configuration conf = new Configuration(); - DFSTestUtil.enableAllECPolicies(conf); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).build(); cluster.waitActive(); + DFSTestUtil.enableAllECPolicies(cluster.getFileSystem()); testSaveAndLoadStripedINodeFile(cluster.getNamesystem(), conf, false); } finally { if (cluster != null) { @@ -262,11 +267,11 @@ public void testSaveAndLoadStripedINodeFile() throws IOException{ public void testSaveAndLoadStripedINodeFileUC() throws IOException { // construct a INode with StripedBlock for saving and loading Configuration conf = new Configuration(); - DFSTestUtil.enableAllECPolicies(conf); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).build(); cluster.waitActive(); + DFSTestUtil.enableAllECPolicies(cluster.getFileSystem()); testSaveAndLoadStripedINodeFile(cluster.getNamesystem(), conf, true); } finally { if (cluster != null) { @@ -462,13 +467,13 @@ public void testSupportBlockGroup() throws Exception { final int BLOCK_SIZE = 8 * 1024 * 1024; Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); - DFSTestUtil.enableAllECPolicies(conf); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(GROUP_SIZE) .build(); cluster.waitActive(); DistributedFileSystem fs = cluster.getFileSystem(); + DFSTestUtil.enableAllECPolicies(fs); Path parentDir = new Path("/ec-10-4"); Path childDir = new Path(parentDir, "ec-3-2"); ErasureCodingPolicy ec32Policy = SystemErasureCodingPolicies @@ -732,13 +737,13 @@ public void testBlockTypeProtoDefaultsToContiguous() throws Exception { public void testSaveAndLoadFileUnderReplicationPolicyDir() throws IOException { Configuration conf = new Configuration(); - DFSTestUtil.enableAllECPolicies(conf); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).build(); cluster.waitActive(); FSNamesystem fsn = cluster.getNamesystem(); DistributedFileSystem fs = cluster.getFileSystem(); + DFSTestUtil.enableAllECPolicies(fs); ErasureCodingPolicy replicaPolicy = SystemErasureCodingPolicies.getReplicationPolicy(); ErasureCodingPolicy defaultEcPolicy = @@ -810,4 +815,150 @@ public void testSaveAndLoadFileUnderReplicationPolicyDir() } } } + + /** + * Test persist and load erasure coding policies. + */ + @Test + public void testSaveAndLoadErasureCodingPolicies() throws IOException{ + Configuration conf = new Configuration(); + final int blockSize = 16 * 1024 * 1024; + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); + try (MiniDFSCluster cluster = + new MiniDFSCluster.Builder(conf).numDataNodes(10).build()) { + cluster.waitActive(); + DistributedFileSystem fs = cluster.getFileSystem(); + DFSTestUtil.enableAllECPolicies(fs); + + // Save namespace and restart NameNode + fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); + fs.saveNamespace(); + fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); + + cluster.restartNameNodes(); + cluster.waitActive(); + + assertEquals("Erasure coding policy number should match", + SystemErasureCodingPolicies.getPolicies().size(), + ErasureCodingPolicyManager.getInstance().getPolicies().length); + + // Add new erasure coding policy + ECSchema newSchema = new ECSchema("rs", 5, 4); + ErasureCodingPolicy newPolicy = + new ErasureCodingPolicy(newSchema, 2 * 1024, (byte) 254); + ErasureCodingPolicy[] policies = new ErasureCodingPolicy[]{newPolicy}; + AddECPolicyResponse[] ret = fs.addErasureCodingPolicies(policies); + assertEquals(1, ret.length); + assertEquals(true, ret[0].isSucceed()); + newPolicy = ret[0].getPolicy(); + + // Save namespace and restart NameNode + fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); + fs.saveNamespace(); + fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); + + cluster.restartNameNodes(); + cluster.waitActive(); + + assertEquals("Erasure coding policy number should match", + SystemErasureCodingPolicies.getPolicies().size() + 1, + ErasureCodingPolicyManager.getInstance().getPolicies().length); + ErasureCodingPolicy ecPolicy = + ErasureCodingPolicyManager.getInstance().getByID(newPolicy.getId()); + assertEquals("Newly added erasure coding policy is not found", + newPolicy, ecPolicy); + assertEquals( + "Newly added erasure coding policy should be of disabled state", + ErasureCodingPolicyState.DISABLED, ecPolicy.getState()); + + // Test enable/disable/remove user customized erasure coding policy + testChangeErasureCodingPolicyState(cluster, blockSize, newPolicy); + // Test enable/disable built-in erasure coding policy + testChangeErasureCodingPolicyState(cluster, blockSize, + SystemErasureCodingPolicies.getByID((byte) 1)); + } + } + + + private void testChangeErasureCodingPolicyState(MiniDFSCluster cluster, + int blockSize, ErasureCodingPolicy targetPolicy) throws IOException { + DistributedFileSystem fs = cluster.getFileSystem(); + + // 1. Enable an erasure coding policy + fs.enableErasureCodingPolicy(targetPolicy.getName()); + targetPolicy.setState(ErasureCodingPolicyState.ENABLED); + // Create file, using the new policy + final Path dirPath = new Path("/striped"); + final Path filePath = new Path(dirPath, "file"); + final int fileLength = blockSize * targetPolicy.getNumDataUnits(); + fs.mkdirs(dirPath); + fs.setErasureCodingPolicy(dirPath, targetPolicy.getName()); + final byte[] bytes = StripedFileTestUtil.generateBytes(fileLength); + DFSTestUtil.writeFile(fs, filePath, bytes); + + + // Save namespace and restart NameNode + fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); + fs.saveNamespace(); + fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); + + cluster.restartNameNodes(); + cluster.waitActive(); + ErasureCodingPolicy ecPolicy = + ErasureCodingPolicyManager.getInstance().getByID(targetPolicy.getId()); + assertEquals("The erasure coding policy is not found", + targetPolicy, ecPolicy); + assertEquals("The erasure coding policy should be of enabled state", + ErasureCodingPolicyState.ENABLED, ecPolicy.getState()); + // Read file regardless of the erasure coding policy state + DFSTestUtil.readFileAsBytes(fs, filePath); + + // 2. Disable an erasure coding policy + fs.disableErasureCodingPolicy(ecPolicy.getName()); + targetPolicy.setState(ErasureCodingPolicyState.DISABLED); + // Save namespace and restart NameNode + fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); + fs.saveNamespace(); + fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); + + cluster.restartNameNodes(); + cluster.waitActive(); + ecPolicy = + ErasureCodingPolicyManager.getInstance().getByID(targetPolicy.getId()); + assertEquals("The erasure coding policy is not found", + targetPolicy, ecPolicy); + assertEquals("The erasure coding policy should be of disabled state", + ErasureCodingPolicyState.DISABLED, ecPolicy.getState()); + // Read file regardless of the erasure coding policy state + DFSTestUtil.readFileAsBytes(fs, filePath); + + // 3. Remove an erasure coding policy + try { + fs.removeErasureCodingPolicy(ecPolicy.getName()); + } catch (RemoteException e) { + // built-in policy cannot been removed + assertTrue("Built-in policy cannot be removed", + ecPolicy.isSystemPolicy()); + assertExceptionContains("System erasure coding policy", e); + return; + } + + targetPolicy.setState(ErasureCodingPolicyState.REMOVED); + // Save namespace and restart NameNode + fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); + fs.saveNamespace(); + fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); + + cluster.restartNameNodes(); + cluster.waitActive(); + ecPolicy = ErasureCodingPolicyManager.getInstance().getByID( + targetPolicy.getId()); + assertEquals("The erasure coding policy saved into and loaded from " + + "fsImage is bad", targetPolicy, ecPolicy); + assertEquals("The erasure coding policy should be of removed state", + ErasureCodingPolicyState.REMOVED, ecPolicy.getState()); + // Read file regardless of the erasure coding policy state + DFSTestUtil.readFileAsBytes(fs, filePath); + fs.delete(dirPath, true); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index 7cdbde21d0b..558e3377708 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -685,13 +685,12 @@ public void testFsckOpenECFiles() throws Exception { final int numAllUnits = dataBlocks + ecPolicy.getNumParityUnits(); int blockSize = 2 * cellSize; conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - ecPolicy.getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes( numAllUnits + 1).build(); String topDir = "/myDir"; cluster.waitActive(); DistributedFileSystem fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy(ecPolicy.getName()); util.createFiles(fs, topDir); // set topDir to EC when it has replicated files cluster.getFileSystem().getClient().setErasureCodingPolicy( @@ -1999,19 +1998,19 @@ public Boolean get() { @Test public void testECFsck() throws Exception { - FileSystem fs = null; + DistributedFileSystem fs = null; final long precision = 1L; conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, precision); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 10000L); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); int dataBlocks = StripedFileTestUtil.getDefaultECPolicy().getNumDataUnits(); int parityBlocks = StripedFileTestUtil.getDefaultECPolicy().getNumParityUnits(); int totalSize = dataBlocks + parityBlocks; cluster = new MiniDFSCluster.Builder(conf).numDataNodes(totalSize).build(); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); // create a contiguous file Path replDirPath = new Path("/replicated"); @@ -2301,11 +2300,11 @@ public void testFsckCorruptECFile() throws Exception { StripedFileTestUtil.getDefaultECPolicy().getNumParityUnits(); int cellSize = StripedFileTestUtil.getDefaultECPolicy().getCellSize(); int totalSize = dataBlocks + parityBlocks; - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(totalSize).build(); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); Map dnIndices = new HashMap<>(); ArrayList dnList = cluster.getDataNodes(); for (int i = 0; i < totalSize; i++) { @@ -2372,11 +2371,11 @@ public void testFsckMissingECFile() throws Exception { StripedFileTestUtil.getDefaultECPolicy().getNumParityUnits(); int cellSize = StripedFileTestUtil.getDefaultECPolicy().getCellSize(); int totalSize = dataBlocks + parityBlocks; - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(totalSize).build(); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); // create file Path ecDirPath = new Path("/striped"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java index 937bb61c7ec..36638e00195 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java @@ -726,8 +726,6 @@ public void testVerifyMissingBlockGroupsMetrics() throws Exception { DistributedFileSystem fs = null; try { Configuration conf = new HdfsConfiguration(); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); int dataBlocks = StripedFileTestUtil.getDefaultECPolicy().getNumDataUnits(); int parityBlocks = StripedFileTestUtil.getDefaultECPolicy().getNumParityUnits(); @@ -736,6 +734,8 @@ public void testVerifyMissingBlockGroupsMetrics() throws Exception { cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(totalSize).build(); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); // create file Path ecDirPath = new Path("/striped"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeRetryCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeRetryCache.java index d217813bd5f..42ff6989e1a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeRetryCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNamenodeRetryCache.java @@ -436,7 +436,7 @@ public void testRetryCacheRebuild() throws Exception { LightWeightCache cacheSet = (LightWeightCache) namesystem.getRetryCache().getCacheSet(); - assertEquals("Retry cache size is wrong", 26, cacheSet.size()); + assertEquals("Retry cache size is wrong", 34, cacheSet.size()); Map oldEntries = new HashMap(); @@ -455,7 +455,7 @@ public void testRetryCacheRebuild() throws Exception { assertTrue(namesystem.hasRetryCache()); cacheSet = (LightWeightCache) namesystem .getRetryCache().getCacheSet(); - assertEquals("Retry cache size is wrong", 26, cacheSet.size()); + assertEquals("Retry cache size is wrong", 34, cacheSet.size()); iter = cacheSet.iterator(); while (iter.hasNext()) { CacheEntry entry = iter.next(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestQuotaWithStripedBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestQuotaWithStripedBlocks.java index f97492b7e04..9995393e675 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestQuotaWithStripedBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestQuotaWithStripedBlocks.java @@ -65,13 +65,12 @@ public class TestQuotaWithStripedBlocks { public void setUp() throws IOException { final Configuration conf = new Configuration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - ecPolicy.getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize).build(); cluster.waitActive(); dir = cluster.getNamesystem().getFSDirectory(); dfs = cluster.getFileSystem(); + dfs.enableErasureCodingPolicy(ecPolicy.getName()); dfs.mkdirs(ecDir); dfs.getClient() diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReconstructStripedBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReconstructStripedBlocks.java index 02075f045d0..46907fd64e8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReconstructStripedBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReconstructStripedBlocks.java @@ -110,13 +110,12 @@ private void doTestMissingStripedBlock(int numOfMissed, int numOfBusy) throws Exception { Configuration conf = new HdfsConfiguration(); initConf(conf); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize + 1) .build(); - try { cluster.waitActive(); + cluster.getFileSystem().enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); final int numBlocks = 4; DFSTestUtil.createStripedFile(cluster, filePath, dirPath, numBlocks, 1, true); @@ -203,14 +202,14 @@ public void test2RecoveryTasksForSameBlockGroup() throws Exception { conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1000); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize + 2) .build(); try { cluster.waitActive(); DistributedFileSystem fs = cluster.getFileSystem(); BlockManager bm = cluster.getNamesystem().getBlockManager(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); fs.getClient().setErasureCodingPolicy("/", StripedFileTestUtil.getDefaultECPolicy().getName()); int fileLen = dataBlocks * blockSize; @@ -280,13 +279,12 @@ public void testCountLiveReplicas() throws Exception { conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, false); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize + 2) .build(); cluster.waitActive(); DistributedFileSystem fs = cluster.getFileSystem(); - + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); try { fs.mkdirs(dirPath); fs.setErasureCodingPolicy(dirPath, @@ -383,8 +381,6 @@ public void testReconstructionWork() throws Exception { ErasureCodingPolicy policy = SystemErasureCodingPolicies.getByID( SystemErasureCodingPolicies.XOR_2_1_POLICY_ID); - conf.setStrings(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - policy.getName()); Path ecDir = new Path("/ec"); Path ecFilePath = new Path(ecDir, "ec-file"); int blockGroups = 2; @@ -396,6 +392,7 @@ public void testReconstructionWork() throws Exception { try { // create an EC file with 2 block groups final DistributedFileSystem fs = dfsCluster.getFileSystem(); + fs.enableErasureCodingPolicy(policy.getName()); fs.mkdirs(ecDir); fs.setErasureCodingPolicy(ecDir, policy.getName()); DFSTestUtil.createStripedFile(dfsCluster, ecFilePath, ecDir, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryption.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryption.java index 5612d6597fe..33c52bf81b9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryption.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryption.java @@ -91,7 +91,7 @@ public class TestReencryption { private FileSystemTestHelper fsHelper; private MiniDFSCluster cluster; - private HdfsAdmin dfsAdmin; + protected HdfsAdmin dfsAdmin; private DistributedFileSystem fs; private FSNamesystem fsn; private File testRootDir; @@ -199,8 +199,7 @@ public void testReencryptionBasic() throws Exception { verifyZoneStatus(zone, null, 0); // test re-encrypt after keyroll - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); waitForReencryptedZones(2); FileEncryptionInfo fei1 = getFileEncryptionInfo(encFile1); @@ -316,8 +315,7 @@ public void testReencryptOrdering() throws Exception { final Path notReencrypted = new Path(zone, "f0"); final FileEncryptionInfo fei = getFileEncryptionInfo(lastReencryptedFile); final FileEncryptionInfo feiLast = getFileEncryptionInfo(notReencrypted); - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // mark pause after first checkpoint (5 files) getEzManager().pauseForTestingAfterNthSubmission(1); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -363,8 +361,7 @@ public void testZoneDeleteDuringReencrypt() throws Exception { 0xFEED); } - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // test zone deleted during re-encrypt's checkpointing getEzManager().pauseForTestingAfterNthSubmission(1); getEzManager().resetMetricsForTesting(); @@ -409,8 +406,7 @@ public void testRestartAfterReencrypt() throws Exception { final Path encFile9 = new Path(zone, "9"); final FileEncryptionInfo fei0 = getFileEncryptionInfo(encFile0); final FileEncryptionInfo fei9 = getFileEncryptionInfo(encFile9); - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); waitForReencryptedZones(1); @@ -443,8 +439,7 @@ public void testRestartWithRenames() throws Exception { fsWrapper.rename(new Path(zone, "f"), new Path(zone, "f1")); // re-encrypt - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); waitForReencryptedZones(1); @@ -495,8 +490,7 @@ public void testRestartDuringReencrypt() throws Exception { final Path encFile9 = new Path(subdir, "9"); final FileEncryptionInfo fei0 = getFileEncryptionInfo(encFile0); final FileEncryptionInfo fei9 = getFileEncryptionInfo(encFile9); - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // mark pause after first checkpoint (5 files) getEzManager().pauseForTestingAfterNthSubmission(1); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -540,8 +534,7 @@ public void testRestartAfterReencryptAndCheckpoint() throws Exception { final Path encFile9 = new Path(zone, "9"); final FileEncryptionInfo fei0 = getFileEncryptionInfo(encFile0); final FileEncryptionInfo fei9 = getFileEncryptionInfo(encFile9); - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); waitForReencryptedZones(1); @@ -585,8 +578,7 @@ public void testReencryptLoadedFromEdits() throws Exception { final Path encFile9 = new Path(zone, "9"); final FileEncryptionInfo fei0 = getFileEncryptionInfo(encFile0); final FileEncryptionInfo fei9 = getFileEncryptionInfo(encFile9); - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // disable re-encrypt for testing, and issue a command getEzManager().pauseReencryptForTesting(); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -645,8 +637,7 @@ public void testReencryptLoadedFromFsimage() throws Exception { final Path encFile9 = new Path(zone, "9"); final FileEncryptionInfo fei0 = getFileEncryptionInfo(encFile0); final FileEncryptionInfo fei9 = getFileEncryptionInfo(encFile9); - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // disable re-encrypt for testing, and issue a command getEzManager().pauseReencryptForTesting(); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -770,8 +761,7 @@ public void testReencryptNestedZones() throws Exception { 0xFEED); } - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // Disable re-encrypt, send re-encrypt on '/', verify queue getEzManager().pauseReencryptForTesting(); dfsAdmin.reencryptEncryptionZone(zoneRoot, ReencryptAction.START); @@ -816,8 +806,7 @@ public void testRaceCreateHandler() throws Exception { .createFile(fs, new Path(zone, "file" + i), len, (short) 1, 0xFEED); } - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // Issue the command re-encrypt and pause it getEzManager().pauseReencryptForTesting(); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -883,8 +872,7 @@ public void testRaceDeleteHandler() throws Exception { .createFile(fs, new Path(subdir, "file" + i), len, (short) 1, 0xFEED); } - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // Issue the command re-encrypt and pause it getEzManager().pauseReencryptForTesting(); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -930,8 +918,7 @@ public void testRaceDeleteUpdater() throws Exception { .createFile(fs, new Path(subdir, "file" + i), len, (short) 1, 0xFEED); } - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // Issue the command re-encrypt and pause it getEzManager().pauseReencryptForTesting(); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -984,8 +971,7 @@ public void testRaceDeleteCurrentDirHandler() throws Exception { 0xFEED); } - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // Issue the command re-encrypt and pause it getEzManager().pauseReencryptForTesting(); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -1029,8 +1015,7 @@ public void testRaceDeleteCurrentDirUpdater() throws Exception { 0xFEED); } - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // Issue the command re-encrypt and pause it getEzManager().pauseReencryptForTesting(); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -1071,8 +1056,7 @@ public void testRaceDeleteZoneHandler() throws Exception { .createFile(fs, new Path(zone, "file" + i), len, (short) 1, 0xFEED); } - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // Issue the command re-encrypt and pause it getEzManager().pauseReencryptForTesting(); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -1122,8 +1106,7 @@ public void testRaceDeleteCreateHandler() throws Exception { .createFile(fs, new Path(zone, "file" + i), len, (short) 1, 0xFEED); } - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // Issue the command re-encrypt and pause it getEzManager().pauseReencryptForTesting(); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -1162,8 +1145,7 @@ public void testRaceDeleteCreateUpdater() throws Exception { .createFile(fs, new Path(zone, "file" + i), len, (short) 1, 0xFEED); } - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // Issue the command re-encrypt and pause it getEzManager().pauseReencryptForTesting(); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -1220,8 +1202,7 @@ public void testReencryptRaceRename() throws Exception { .createFile(fs, new Path(subdir, "file" + i), len, (short) 1, 0xFEED); } - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // Issue the command re-encrypt and pause it getEzManager().pauseReencryptForTesting(); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -1283,8 +1264,7 @@ public void testReencryptSnapshots() throws Exception { // test re-encrypt on snapshot dir final Path encFile1 = new Path(zone, "0"); final FileEncryptionInfo fei0 = getFileEncryptionInfo(encFile1); - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); try { dfsAdmin.reencryptEncryptionZone(zoneSnap, ReencryptAction.START); fail("Reencrypt command on snapshot path should fail."); @@ -1423,8 +1403,7 @@ public void testReencryptCancel() throws Exception { fsWrapper.mkdir(subdir, FsPermission.getDirDefault(), true); DFSTestUtil.createFile(fs, new Path(subdir, "f"), len, (short) 1, 0xFEED); - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // disable, test basic getEzManager().pauseReencryptForTesting(); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -1442,8 +1421,7 @@ public void testReencryptCancel() throws Exception { assertExceptionContains("not under re-encryption", expected); } - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // test cancelling half-way getEzManager().pauseForTestingAfterNthSubmission(1); getEzManager().resumeReencryptForTesting(); @@ -1537,8 +1515,7 @@ public void reencryptEncryptedKeys() throws IOException { // re-encrypt 10 files, so 2 callables. Hang 1, pause the updater so the // callable is taken from the executor but not processed. - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); getEzManager().pauseReencryptForTesting(); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); waitForQueuedZones(1); @@ -1593,8 +1570,7 @@ public void testReencryptCancelForUpdater() throws Exception { fsWrapper.mkdir(subdir, FsPermission.getDirDefault(), true); DFSTestUtil.createFile(fs, new Path(subdir, "f"), len, (short) 1, 0xFEED); - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // disable, test basic getEzManager().pauseReencryptUpdaterForTesting(); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -1625,8 +1601,7 @@ public void testReencryptionWithoutProvider() throws Exception { } // re-encrypt the zone - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); waitForReencryptedZones(1); @@ -1678,8 +1653,7 @@ public void testReencryptionNNSafeMode() throws Exception { 0xFEED); } - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); + rollKey(TEST_KEY); // mark pause after first checkpoint (5 files) getEzManager().pauseForTestingAfterNthSubmission(1); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); @@ -1736,9 +1710,7 @@ public void reencryptEncryptedKeys() throws IOException { } // re-encrypt the zone - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); - + rollKey(TEST_KEY); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); waitForReencryptedZones(1); assertEquals(0, injector.exceptionCount); @@ -1790,9 +1762,7 @@ public void reencryptUpdaterProcessOneTask() throws IOException { } // re-encrypt the zone - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); - + rollKey(TEST_KEY); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); waitForReencryptedZones(1); assertEquals(0, injector.exceptionCount); @@ -1845,9 +1815,7 @@ public void reencryptUpdaterProcessCheckpoint() throws IOException { } // re-encrypt the zone - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); - + rollKey(TEST_KEY); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); waitForReencryptedZones(1); assertEquals(0, injector.exceptionCount); @@ -1899,9 +1867,7 @@ public void reencryptUpdaterProcessOneTask() throws IOException { } // re-encrypt the zone - fsn.getProvider().rollNewVersion(TEST_KEY); - fsn.getProvider().flush(); - + rollKey(TEST_KEY); Whitebox.setInternalState(getUpdater(), "faultRetryInterval", 50); dfsAdmin.reencryptEncryptionZone(zone, ReencryptAction.START); waitForReencryptedZones(1); @@ -1929,4 +1895,11 @@ private ReencryptionUpdater getUpdater() { return (ReencryptionUpdater) Whitebox .getInternalState(getHandler(), "reencryptionUpdater"); } + + protected void rollKey(final String keyName) throws Exception { + dfsAdmin.getKeyProvider().rollNewVersion(keyName); + // need to flush for jceks provider to make the key version it returned + // after NN restart consistent. + dfsAdmin.getKeyProvider().flush(); + } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryptionWithKMS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryptionWithKMS.java index af9c381ac86..642d5e53707 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryptionWithKMS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryptionWithKMS.java @@ -88,4 +88,9 @@ public void testReencryptionKMSACLs() throws Exception { KMSWebApp.getACLs().run(); testReencryptionBasic(); } + + @Override + protected void rollKey(final String keyName) throws Exception { + dfsAdmin.getKeyProvider().rollNewVersion(keyName); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java index 94172bbe696..d5f548736f9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java @@ -577,7 +577,6 @@ public void testCorruptImageFallbackLostECPolicy() throws IOException { .getDefaultECPolicy(); final String policy = defaultPolicy.getName(); final Path f1 = new Path("/f1"); - config.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, policy); MiniDFSCluster cluster = new MiniDFSCluster.Builder(config) .numDataNodes(0) @@ -586,6 +585,7 @@ public void testCorruptImageFallbackLostECPolicy() throws IOException { try { cluster.waitActive(); DistributedFileSystem fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy(policy); // set root directory to use the default ec policy Path srcECDir = new Path("/"); fs.setErasureCodingPolicy(srcECDir, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStripedINodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStripedINodeFile.java index c71d049243b..468e47fd18a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStripedINodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStripedINodeFile.java @@ -318,8 +318,6 @@ public void testDeleteOp() throws Exception { final short GROUP_SIZE = (short) (testECPolicy.getNumDataUnits() + testECPolicy.getNumParityUnits()); conf.setInt(DFSConfigKeys.DFS_NAMENODE_MAX_XATTRS_PER_INODE_KEY, 2); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(GROUP_SIZE) .build(); @@ -327,6 +325,8 @@ public void testDeleteOp() throws Exception { FSNamesystem fsn = cluster.getNamesystem(); dfs = cluster.getFileSystem(); + dfs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); dfs.mkdirs(ecDir); // set erasure coding policy @@ -395,8 +395,6 @@ public void testUnsuitableStoragePoliciesWithECStripedMode() 1L); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, false); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); // start 10 datanodes int numOfDatanodes = 10; @@ -426,6 +424,8 @@ public void testUnsuitableStoragePoliciesWithECStripedMode() try { cluster.waitActive(); + cluster.getFileSystem().enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); // set "/foo" directory with ONE_SSD storage policy. ClientProtocol client = NameNodeProxies.createProxy(conf, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java index b40006be732..1d114d62e4d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java @@ -166,7 +166,7 @@ public void testRetryCacheOnStandbyNN() throws Exception { FSNamesystem fsn0 = cluster.getNamesystem(0); LightWeightCache cacheSet = (LightWeightCache) fsn0.getRetryCache().getCacheSet(); - assertEquals("Retry cache size is wrong", 26, cacheSet.size()); + assertEquals("Retry cache size is wrong", 34, cacheSet.size()); Map oldEntries = new HashMap(); @@ -187,7 +187,7 @@ public void testRetryCacheOnStandbyNN() throws Exception { FSNamesystem fsn1 = cluster.getNamesystem(1); cacheSet = (LightWeightCache) fsn1 .getRetryCache().getCacheSet(); - assertEquals("Retry cache size is wrong", 26, cacheSet.size()); + assertEquals("Retry cache size is wrong", 34, cacheSet.size()); iter = cacheSet.iterator(); while (iter.hasNext()) { CacheEntry entry = iter.next(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java index b983fd16262..077a5f898a8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java @@ -137,9 +137,6 @@ public class TestNameNodeMetrics { // Enable stale DataNodes checking CONF.setBoolean( DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY, true); - // Enable erasure coding - CONF.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - EC_POLICY.getName()); GenericTestUtils.setLogLevel(LogFactory.getLog(MetricsAsserts.class), Level.DEBUG); } @@ -166,6 +163,7 @@ public void setUp() throws Exception { namesystem = cluster.getNamesystem(); bm = namesystem.getBlockManager(); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy(EC_POLICY.getName()); ecDir = getTestPath("/ec"); fs.mkdirs(ecDir); fs.setErasureCodingPolicy(ecDir, EC_POLICY.getName()); @@ -449,7 +447,7 @@ private void verifyAggregatedMetricsTally() throws Exception { assertEquals("Pending deletion blocks metrics not matching!", namesystem.getPendingDeletionBlocks(), namesystem.getPendingDeletionReplicatedBlocks() + - namesystem.getPendingDeletionECBlockGroups()); + namesystem.getPendingDeletionECBlocks()); } /** Corrupt a block and ensure metrics reflects it */ @@ -891,7 +889,7 @@ public void testTransactionAndCheckpointMetrics() throws Exception { public void testSyncAndBlockReportMetric() throws Exception { MetricsRecordBuilder rb = getMetrics(NN_METRICS); // We have one sync when the cluster starts up, just opening the journal - assertCounter("SyncsNumOps", 3L, rb); + assertCounter("SyncsNumOps", 4L, rb); // Each datanode reports in when the cluster comes up assertCounter("StorageBlockReportNumOps", (long) DATANODE_COUNT * cluster.getStoragesPerDatanode(), rb); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java index 9b8260814a6..ae8f585e2c8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java @@ -537,8 +537,6 @@ public void testReportCommand() throws Exception { final Configuration dfsConf = new HdfsConfiguration(); ErasureCodingPolicy ecPolicy = SystemErasureCodingPolicies.getByID( SystemErasureCodingPolicies.XOR_2_1_POLICY_ID); - dfsConf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - ecPolicy.getName()); dfsConf.setInt( DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 500); dfsConf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, 1); @@ -568,6 +566,7 @@ public void testReportCommand() throws Exception { final long fileLength = 512L; final DistributedFileSystem fs = miniCluster.getFileSystem(); final Path file = new Path(baseDir, "/corrupted"); + fs.enableErasureCodingPolicy(ecPolicy.getName()); DFSTestUtil.createFile(fs, file, fileLength, replFactor, 12345L); DFSTestUtil.waitReplication(fs, file, replFactor); final ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file); @@ -781,9 +780,9 @@ private void verifyNodesAndCorruptBlocks( assertEquals(numCorruptBlocks + numCorruptECBlockGroups, client.getCorruptBlocksCount()); assertEquals(numCorruptBlocks, client.getNamenode() - .getBlocksStats().getCorruptBlocksStat()); + .getReplicatedBlockStats().getCorruptBlocks()); assertEquals(numCorruptECBlockGroups, client.getNamenode() - .getECBlockGroupsStats().getCorruptBlockGroupsStat()); + .getECBlockGroupStats().getCorruptBlockGroups()); } @Test diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java index bbad73c0418..9e1fa79a52f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java @@ -182,7 +182,7 @@ public void testStored() throws IOException { hasAllOpCodes(editsStored)); assertTrue("Reference XML edits and parsed to XML should be same", FileUtils.contentEqualsIgnoreEOL(new File(editsStoredXml), - new File(editsStoredParsedXml), "UTF-8")); + new File(editsStoredParsedXml), "UTF-8")); assertTrue( "Reference edits and reparsed (bin to XML to bin) should be same", filesEqualIgnoreTrailingZeros(editsStored, editsStoredReparsed)); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java index 46f194107ce..b32b308958b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java @@ -137,11 +137,10 @@ public static void createOriginalFSImage() throws IOException { conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, true); conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL, "RULE:[2:$1@$0](JobTracker@.*FOO.COM)s/@.*//" + "DEFAULT"); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - ecPolicy.getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); cluster.waitActive(); DistributedFileSystem hdfs = cluster.getFileSystem(); + hdfs.enableErasureCodingPolicy(ecPolicy.getName()); // Create a reasonable namespace for (int i = 0; i < NUM_DIRS; i++, dirCount++) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerWithStripedBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerWithStripedBlocks.java index d04ef99d630..187b297b42d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerWithStripedBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewerWithStripedBlocks.java @@ -61,13 +61,13 @@ public void setup() throws IOException { int numDNs = dataBlocks + parityBlocks + 2; Configuration conf = new Configuration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - StripedFileTestUtil.getDefaultECPolicy().getName()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); cluster.waitActive(); cluster.getFileSystem().getClient().setErasureCodingPolicy("/", StripedFileTestUtil.getDefaultECPolicy().getName()); fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); Path eczone = new Path("/eczone"); fs.mkdirs(eczone); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java index 60d90fb37b4..de051b3d5d1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java @@ -29,7 +29,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.io.PrintWriter; import java.net.HttpURLConnection; import java.net.InetSocketAddress; import java.net.SocketException; @@ -39,16 +38,8 @@ import java.net.URL; import java.security.PrivilegedExceptionAction; import java.util.Arrays; -import java.util.List; -import java.util.Map; import java.util.Random; -import javax.servlet.ServletException; -import javax.servlet.http.HttpServlet; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; -import javax.ws.rs.core.MediaType; - import com.google.common.collect.ImmutableList; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; @@ -80,12 +71,8 @@ import org.apache.hadoop.hdfs.TestFileCreation; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; -import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies; -import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsConstants; -import org.apache.hadoop.hdfs.protocol.LocatedBlock; -import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper; @@ -96,8 +83,6 @@ import org.apache.hadoop.hdfs.web.resources.NoRedirectParam; import org.apache.hadoop.hdfs.web.resources.OffsetParam; import org.apache.hadoop.hdfs.web.resources.Param; -import org.apache.hadoop.http.HttpServer2; -import org.apache.hadoop.http.HttpServerFunctionalTest; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryPolicy.RetryAction; import org.apache.hadoop.io.retry.RetryPolicy.RetryAction.RetryDecision; @@ -114,12 +99,8 @@ import org.junit.Test; import org.mockito.internal.util.reflection.Whitebox; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.type.MapType; - import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyInt; -import static org.mockito.Matchers.anyLong; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.spy; @@ -519,13 +500,12 @@ public void testWebHdfsAllowandDisallowSnapshots() throws Exception { public void testWebHdfsErasureCodingFiles() throws Exception { MiniDFSCluster cluster = null; final Configuration conf = WebHdfsTestUtil.createConf(); - conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - SystemErasureCodingPolicies.getByID( - SystemErasureCodingPolicies.XOR_2_1_POLICY_ID).getName()); try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); cluster.waitActive(); final DistributedFileSystem dfs = cluster.getFileSystem(); + dfs.enableErasureCodingPolicy(SystemErasureCodingPolicies.getByID( + SystemErasureCodingPolicies.XOR_2_1_POLICY_ID).getName()); final WebHdfsFileSystem webHdfs = WebHdfsTestUtil .getWebHdfsFileSystem(conf, WebHdfsConstants.WEBHDFS_SCHEME); @@ -975,76 +955,6 @@ public void testWebHdfsGetBlockLocationsWithStorageType() throws Exception{ Assert.assertTrue(storageTypes != null && storageTypes.length > 0 && storageTypes[0] == StorageType.DISK); } - - // Query webhdfs REST API to get block locations - InetSocketAddress addr = cluster.getNameNode().getHttpAddress(); - - // Case 1 - // URL without length or offset parameters - URL url1 = new URL("http", addr.getHostString(), addr.getPort(), - WebHdfsFileSystem.PATH_PREFIX + "/foo?op=GETFILEBLOCKLOCATIONS"); - LOG.info("Sending GETFILEBLOCKLOCATIONS request " + url1); - - String response1 = getResponse(url1, "GET"); - LOG.info("The output of GETFILEBLOCKLOCATIONS request " + response1); - // Parse BlockLocation array from json output using object mapper - BlockLocation[] locationArray1 = toBlockLocationArray(response1); - - // Verify the result from rest call is same as file system api - verifyEquals(locations, locationArray1); - - // Case 2 - // URL contains length and offset parameters - URL url2 = new URL("http", addr.getHostString(), addr.getPort(), - WebHdfsFileSystem.PATH_PREFIX + "/foo?op=GETFILEBLOCKLOCATIONS" - + "&length=" + LENGTH + "&offset=" + OFFSET); - LOG.info("Sending GETFILEBLOCKLOCATIONS request " + url2); - - String response2 = getResponse(url2, "GET"); - LOG.info("The output of GETFILEBLOCKLOCATIONS request " + response2); - BlockLocation[] locationArray2 = toBlockLocationArray(response2); - - verifyEquals(locations, locationArray2); - - // Case 3 - // URL contains length parameter but without offset parameters - URL url3 = new URL("http", addr.getHostString(), addr.getPort(), - WebHdfsFileSystem.PATH_PREFIX + "/foo?op=GETFILEBLOCKLOCATIONS" - + "&length=" + LENGTH); - LOG.info("Sending GETFILEBLOCKLOCATIONS request " + url3); - - String response3 = getResponse(url3, "GET"); - LOG.info("The output of GETFILEBLOCKLOCATIONS request " + response3); - BlockLocation[] locationArray3 = toBlockLocationArray(response3); - - verifyEquals(locations, locationArray3); - - // Case 4 - // URL contains offset parameter but without length parameter - URL url4 = new URL("http", addr.getHostString(), addr.getPort(), - WebHdfsFileSystem.PATH_PREFIX + "/foo?op=GETFILEBLOCKLOCATIONS" - + "&offset=" + OFFSET); - LOG.info("Sending GETFILEBLOCKLOCATIONS request " + url4); - - String response4 = getResponse(url4, "GET"); - LOG.info("The output of GETFILEBLOCKLOCATIONS request " + response4); - BlockLocation[] locationArray4 = toBlockLocationArray(response4); - - verifyEquals(locations, locationArray4); - - // Case 5 - // URL specifies offset exceeds the file length - URL url5 = new URL("http", addr.getHostString(), addr.getPort(), - WebHdfsFileSystem.PATH_PREFIX + "/foo?op=GETFILEBLOCKLOCATIONS" - + "&offset=1200"); - LOG.info("Sending GETFILEBLOCKLOCATIONS request " + url5); - - String response5 = getResponse(url5, "GET"); - LOG.info("The output of GETFILEBLOCKLOCATIONS request " + response5); - BlockLocation[] locationArray5 = toBlockLocationArray(response5); - - // Expected an empty array of BlockLocation - verifyEquals(new BlockLocation[] {}, locationArray5); } finally { if (cluster != null) { cluster.shutdown(); @@ -1052,66 +962,6 @@ public void testWebHdfsGetBlockLocationsWithStorageType() throws Exception{ } } - private BlockLocation[] toBlockLocationArray(String json) - throws IOException { - ObjectMapper mapper = new ObjectMapper(); - MapType subType = mapper.getTypeFactory().constructMapType( - Map.class, - String.class, - BlockLocation[].class); - MapType rootType = mapper.getTypeFactory().constructMapType( - Map.class, - mapper.constructType(String.class), - mapper.constructType(subType)); - - Map> jsonMap = mapper - .readValue(json, rootType); - Map locationMap = jsonMap - .get("BlockLocations"); - BlockLocation[] locationArray = locationMap.get( - BlockLocation.class.getSimpleName()); - return locationArray; - } - - private void verifyEquals(BlockLocation[] locations1, - BlockLocation[] locations2) throws IOException { - for(int i=0; i - * First time call it return a wrapped json response with a - * IllegalArgumentException - *

- * Second time call it return a valid GET_BLOCK_LOCATIONS - * json response - *

- * Third time call it return a wrapped json response with - * a random IOException - * - */ - public static class MockWebHdfsServlet extends HttpServlet { - - private static final long serialVersionUID = 1L; - private static int respondTimes = 0; - private static final String RANDOM_EXCEPTION_MSG = - "This is a random exception"; - - @Override - public void doGet(HttpServletRequest request, - HttpServletResponse response) throws ServletException, IOException { - response.setHeader("Content-Type", - MediaType.APPLICATION_JSON); - String param = request.getParameter("op"); - if(respondTimes == 0) { - Exception mockException = new IllegalArgumentException( - "Invalid value for webhdfs parameter \"op\". " - + "" + "No enum constant " + param); - sendException(request, response, mockException); - } else if (respondTimes == 1) { - sendResponse(request, response); - } else if (respondTimes == 2) { - Exception mockException = new IOException(RANDOM_EXCEPTION_MSG); - sendException(request, response, mockException); - } - respondTimes++; - } - - private void sendResponse(HttpServletRequest request, - HttpServletResponse response) throws IOException { - response.setStatus(HttpServletResponse.SC_OK); - // Construct a LocatedBlock for testing - DatanodeInfo d = DFSTestUtil.getLocalDatanodeInfo(); - DatanodeInfo[] ds = new DatanodeInfo[1]; - ds[0] = d; - ExtendedBlock b1 = new ExtendedBlock("bpid", 1, 121, 1); - LocatedBlock l1 = new LocatedBlock(b1, ds); - l1.setStartOffset(0); - l1.setCorrupt(false); - List ls = Arrays.asList(l1); - LocatedBlocks locatedblocks = - new LocatedBlocks(10, false, ls, l1, - true, null, null); - - try (PrintWriter pw = response.getWriter()) { - pw.write(JsonUtil.toJsonString(locatedblocks)); - } - } - - private void sendException(HttpServletRequest request, - HttpServletResponse response, - Exception mockException) throws IOException { - response.setStatus(HttpServletResponse.SC_BAD_REQUEST); - String errJs = JsonUtil.toJsonString(mockException); - try (PrintWriter pw = response.getWriter()) { - pw.write(errJs); - } - } - } - - @Test - public void testGetFileBlockLocationsBackwardsCompatibility() - throws Exception { - final Configuration conf = WebHdfsTestUtil.createConf(); - final String pathSpec = WebHdfsFileSystem.PATH_PREFIX + "/*"; - HttpServer2 http = null; - try { - http = HttpServerFunctionalTest.createTestServer(conf); - http.addServlet("test", pathSpec, MockWebHdfsServlet.class); - http.start(); - - // Write the address back to configuration so - // WebHdfsFileSystem could connect to the mock server - conf.set(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY, - "localhost:" + http.getConnectorAddress(0).getPort()); - - final WebHdfsFileSystem webFS = WebHdfsTestUtil.getWebHdfsFileSystem( - conf, WebHdfsConstants.WEBHDFS_SCHEME); - - WebHdfsFileSystem spyFs = spy(webFS); - BlockLocation[] locations = spyFs - .getFileBlockLocations(new Path("p"), 0, 100); - - // Verify result - assertEquals(1, locations.length); - assertEquals(121, locations[0].getLength()); - - // Verify the fall back - // The function should be called exactly 2 times - // 1st time handles GETFILEBLOCKLOCATIONS and found it is not supported - // 2nd time fall back to handle GET_FILE_BLOCK_LOCATIONS - verify(spyFs, times(2)).getFileBlockLocations(any(), - any(), anyLong(), anyLong()); - - // Verify it doesn't erroneously fall back - // When server returns a different error, it should directly - // throw an exception. - try { - spyFs.getFileBlockLocations(new Path("p"), 0, 100); - } catch (Exception e) { - assertTrue(e instanceof IOException); - assertEquals(e.getMessage(), MockWebHdfsServlet.RANDOM_EXCEPTION_MSG); - // Totally this function has been called 3 times - verify(spyFs, times(3)).getFileBlockLocations(any(), - any(), anyLong(), anyLong()); - } - } finally { - if(http != null) { - http.stop(); - } - } - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored index e271cb574bd..80295750f11 100644 Binary files a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored differ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml index f9011450555..0a1c25e4712 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml @@ -1179,23 +1179,107 @@ - OP_ROLLING_UPGRADE_START + OP_ADD_ERASURE_CODING_POLICY 89 + rs + 3 + 2 + 8192 + 0 + 7334ec24-dd6b-4efd-807d-ed0d18625534 + 84 + + + + OP_ADD_ERASURE_CODING_POLICY + + 90 + rs + 6 + 10 + 4096 + 1 + + dummyKey + dummyValue + + 7334ec24-dd6b-4efd-807d-ed0d18625534 + 85 + + + + OP_ENABLE_ERASURE_CODING_POLICY + + 91 + RS-3-2-8k + 7334ec24-dd6b-4efd-807d-ed0d18625534 + 86 + + + + OP_ENABLE_ERASURE_CODING_POLICY + + 92 + RS-6-10-4k + 7334ec24-dd6b-4efd-807d-ed0d18625534 + 87 + + + + OP_DISABLE_ERASURE_CODING_POLICY + + 93 + RS-3-2-8k + 7334ec24-dd6b-4efd-807d-ed0d18625534 + 88 + + + + OP_DISABLE_ERASURE_CODING_POLICY + + 94 + RS-6-10-4k + 7334ec24-dd6b-4efd-807d-ed0d18625534 + 89 + + + + OP_REMOVE_ERASURE_CODING_POLICY + + 95 + RS-3-2-8k + 7334ec24-dd6b-4efd-807d-ed0d18625534 + 90 + + + + OP_REMOVE_ERASURE_CODING_POLICY + + 96 + RS-6-10-4k + 7334ec24-dd6b-4efd-807d-ed0d18625534 + 91 + + + + OP_ROLLING_UPGRADE_START + + 97 1422406383706 OP_ROLLING_UPGRADE_FINALIZE - 90 + 98 1422406383706 OP_END_LOG_SEGMENT - 91 + 99 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml index c34f7bd32f8..ce5fdc86111 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml @@ -42,6 +42,10 @@ org.apache.hadoop hadoop-yarn-common + + org.apache.hadoop + hadoop-hdfs-client + org.apache.hadoop hadoop-hdfs diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java index b0347fd3207..542e956582a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java @@ -887,7 +887,7 @@ public void startCommunicationThread() { } public void stopCommunicationThread() throws InterruptedException { if (pingThread != null) { - // Intent of the lock is to not send an interupt in the middle of an + // Intent of the lock is to not send an interrupt in the middle of an // umbilical.ping or umbilical.statusUpdate synchronized(lock) { //Interrupt if sleeping. Otherwise wait for the RPC call to return. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/MultipleOutputs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/MultipleOutputs.java index f0f3652beb1..3ef6601fbfe 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/MultipleOutputs.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/MultipleOutputs.java @@ -27,7 +27,7 @@ import java.util.*; /** - * The MultipleOutputs class simplifies writting to additional outputs other + * The MultipleOutputs class simplifies writing to additional outputs other * than the job default output via the OutputCollector passed to * the map() and reduce() methods of the * Mapper and Reducer implementations. @@ -36,7 +36,7 @@ * OutputFormat, with its own key class and with its own value * class. *

- * A named output can be a single file or a multi file. The later is refered as + * A named output can be a single file or a multi file. The later is referred as * a multi named output. *

* A multi named output is an unbound set of files all sharing the same diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobResourceUploader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobResourceUploader.java index f1cad57dd41..d9bf988f9b8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobResourceUploader.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobResourceUploader.java @@ -36,6 +36,8 @@ import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies; import org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager; import org.apache.hadoop.mapreduce.filecache.DistributedCache; @@ -94,6 +96,11 @@ public void uploadResources(Job job, Path submitJobDir) throws IOException { new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION); mkdirs(jtFs, submitJobDir, mapredSysPerms); + if (!conf.getBoolean(MRJobConfig.MR_AM_STAGING_DIR_ERASURECODING_ENABLED, + MRJobConfig.DEFAULT_MR_AM_STAGING_ERASURECODING_ENABLED)) { + disableErasureCodingForPath(jtFs, submitJobDir); + } + Collection files = conf.getStringCollection("tmpfiles"); Collection libjars = conf.getStringCollection("tmpjars"); Collection archives = conf.getStringCollection("tmparchives"); @@ -575,4 +582,14 @@ private String validateFilePath(String file, Configuration conf) } return finalPath; } + + private void disableErasureCodingForPath(FileSystem fs, Path path) + throws IOException { + if (jtFs instanceof DistributedFileSystem) { + LOG.info("Disabling Erasure Coding for path: " + path); + DistributedFileSystem dfs = (DistributedFileSystem) jtFs; + dfs.setErasureCodingPolicy(path, + SystemErasureCodingPolicies.getReplicationPolicy().getName()); + } + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 2023ba3b1d2..86abb42983a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -1037,4 +1037,9 @@ public interface MRJobConfig { String FINISH_JOB_WHEN_REDUCERS_DONE = "mapreduce.job.finish-when-all-reducers-done"; boolean DEFAULT_FINISH_JOB_WHEN_REDUCERS_DONE = true; + + String MR_AM_STAGING_DIR_ERASURECODING_ENABLED = + MR_AM_STAGING_DIR + "erasurecoding.enabled"; + + boolean DEFAULT_MR_AM_STAGING_ERASURECODING_ENABLED = false; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java index 9e750be2feb..0061406f7ea 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputCommitter.java @@ -39,13 +39,14 @@ import org.apache.hadoop.mapreduce.TaskAttemptID; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; /** An {@link OutputCommitter} that commits files specified * in job output directory i.e. ${mapreduce.output.fileoutputformat.outputdir}. **/ @InterfaceAudience.Public @InterfaceStability.Stable -public class FileOutputCommitter extends OutputCommitter { +public class FileOutputCommitter extends PathOutputCommitter { private static final Log LOG = LogFactory.getLog(FileOutputCommitter.class); /** @@ -101,8 +102,11 @@ public class FileOutputCommitter extends OutputCommitter { public FileOutputCommitter(Path outputPath, TaskAttemptContext context) throws IOException { this(outputPath, (JobContext)context); - if (outputPath != null) { - workPath = getTaskAttemptPath(context, outputPath); + if (getOutputPath() != null) { + workPath = Preconditions.checkNotNull( + getTaskAttemptPath(context, getOutputPath()), + "Null task attempt path in %s and output path %s", + context, outputPath); } } @@ -116,6 +120,7 @@ public FileOutputCommitter(Path outputPath, @Private public FileOutputCommitter(Path outputPath, JobContext context) throws IOException { + super(outputPath, context); Configuration conf = context.getConfiguration(); algorithmVersion = conf.getInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, @@ -705,4 +710,18 @@ public void recoverTask(TaskAttemptContext context) LOG.warn("Output Path is null in recoverTask()"); } } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder( + "FileOutputCommitter{"); + sb.append(super.toString()).append("; "); + sb.append("outputPath=").append(outputPath); + sb.append(", workPath=").append(workPath); + sb.append(", algorithmVersion=").append(algorithmVersion); + sb.append(", skipCleanup=").append(skipCleanup); + sb.append(", ignoreCleanupFailures=").append(ignoreCleanupFailures); + sb.append('}'); + return sb.toString(); + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java index c11f8d86d56..0e7efa3941e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.text.NumberFormat; +import com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -38,11 +39,15 @@ import org.apache.hadoop.mapreduce.TaskID; import org.apache.hadoop.mapreduce.TaskInputOutputContext; import org.apache.hadoop.mapreduce.security.TokenCache; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** A base class for {@link OutputFormat}s that read from {@link FileSystem}s.*/ @InterfaceAudience.Public @InterfaceStability.Stable public abstract class FileOutputFormat extends OutputFormat { + private static final Logger LOG = + LoggerFactory.getLogger(FileOutputFormat.class); /** Construct output file names so that, when an output directory listing is * sorted lexicographically, positions correspond to output partitions.*/ @@ -53,12 +58,25 @@ public abstract class FileOutputFormat extends OutputFormat { NUMBER_FORMAT.setMinimumIntegerDigits(5); NUMBER_FORMAT.setGroupingUsed(false); } - private FileOutputCommitter committer = null; -public static final String COMPRESS ="mapreduce.output.fileoutputformat.compress"; -public static final String COMPRESS_CODEC = -"mapreduce.output.fileoutputformat.compress.codec"; -public static final String COMPRESS_TYPE = "mapreduce.output.fileoutputformat.compress.type"; -public static final String OUTDIR = "mapreduce.output.fileoutputformat.outputdir"; + private PathOutputCommitter committer = null; + + /** Configuration option: should output be compressed? {@value}. */ + public static final String COMPRESS = + "mapreduce.output.fileoutputformat.compress"; + + /** If compression is enabled, name of codec: {@value}. */ + public static final String COMPRESS_CODEC = + "mapreduce.output.fileoutputformat.compress.codec"; + /** + * Type of compression {@value}: NONE, RECORD, BLOCK. + * Generally only used in {@code SequenceFileOutputFormat}. + */ + public static final String COMPRESS_TYPE = + "mapreduce.output.fileoutputformat.compress.type"; + + /** Destination directory of work: {@value}. */ + public static final String OUTDIR = + "mapreduce.output.fileoutputformat.outputdir"; @Deprecated public enum Counter { @@ -110,14 +128,14 @@ public static boolean getCompressOutput(JobContext job) { */ public static Class getOutputCompressorClass(JobContext job, - Class defaultValue) { + Class defaultValue) { Class codecClass = defaultValue; Configuration conf = job.getConfiguration(); String name = conf.get(FileOutputFormat.COMPRESS_CODEC); if (name != null) { try { - codecClass = - conf.getClassByName(name).asSubclass(CompressionCodec.class); + codecClass = + conf.getClassByName(name).asSubclass(CompressionCodec.class); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("Compression codec " + name + " was not found.", e); @@ -219,9 +237,11 @@ public static Path getOutputPath(JobContext job) { public static Path getWorkOutputPath(TaskInputOutputContext context ) throws IOException, InterruptedException { - FileOutputCommitter committer = (FileOutputCommitter) + PathOutputCommitter committer = (PathOutputCommitter) context.getOutputCommitter(); - return committer.getWorkPath(); + Path workPath = committer.getWorkPath(); + LOG.debug("Work path is {}", workPath); + return workPath; } /** @@ -281,10 +301,17 @@ public synchronized static String getUniqueFile(TaskAttemptContext context, */ public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException{ - FileOutputCommitter committer = - (FileOutputCommitter) getOutputCommitter(context); - return new Path(committer.getWorkPath(), getUniqueFile(context, - getOutputName(context), extension)); + OutputCommitter c = getOutputCommitter(context); + Preconditions.checkState(c instanceof PathOutputCommitter, + "Committer %s is not a PathOutputCommitter", c); + Path workPath = ((PathOutputCommitter) c).getWorkPath(); + Preconditions.checkNotNull(workPath, + "Null workPath returned by committer %s", c); + Path workFile = new Path(workPath, + getUniqueFile(context, getOutputName(context), extension)); + LOG.debug("Work file for {} extension '{}' is {}", + context, extension, workFile); + return workFile; } /** diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/PathOutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/PathOutputCommitter.java new file mode 100644 index 00000000000..2df30bac951 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/PathOutputCommitter.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.lib.output; + +import java.io.IOException; + +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +/** + * A committer which somehow commits data written to a working directory + * to the final directory during the commit process. The reference + * implementation of this is the {@link FileOutputCommitter}. + * + * There are two constructors, both of which do nothing but long and + * validate their arguments. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public abstract class PathOutputCommitter extends OutputCommitter { + private static final Logger LOG = + LoggerFactory.getLogger(PathOutputCommitter.class); + + private final JobContext context; + + /** + * Constructor for a task attempt. + * Subclasses should provide a public constructor with this signature. + * @param outputPath output path: may be null + * @param context task context + * @throws IOException IO problem + */ + protected PathOutputCommitter(Path outputPath, + TaskAttemptContext context) throws IOException { + this.context = Preconditions.checkNotNull(context, "Null context"); + LOG.debug("Creating committer with output path {} and task context" + + " {}", outputPath, context); + } + + /** + * Constructor for a job attempt. + * Subclasses should provide a public constructor with this signature. + * @param outputPath output path: may be null + * @param context task context + * @throws IOException IO problem + */ + protected PathOutputCommitter(Path outputPath, + JobContext context) throws IOException { + this.context = Preconditions.checkNotNull(context, "Null context"); + LOG.debug("Creating committer with output path {} and job context" + + " {}", outputPath, context); + } + + /** + * Get the directory that the task should write results into. + * Warning: there's no guarantee that this work path is on the same + * FS as the final output, or that it's visible across machines. + * @return the work directory + * @throws IOException IO problem + */ + public abstract Path getWorkPath() throws IOException; + + @Override + public String toString() { + return "PathOutputCommitter{context=" + context + '}'; + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/JobContextImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/JobContextImpl.java index b9014ef7861..1696246b843 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/JobContextImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/JobContextImpl.java @@ -451,5 +451,13 @@ public String getUser() { public Credentials getCredentials() { return credentials; } - + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder( + "JobContextImpl{"); + sb.append("jobId=").append(jobId); + sb.append('}'); + return sb.toString(); + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/TaskAttemptContextImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/TaskAttemptContextImpl.java index 333f57b4264..a622d3aa75f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/TaskAttemptContextImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/TaskAttemptContextImpl.java @@ -118,4 +118,15 @@ public float getProgress() { public float getProgress() { return reporter.getProgress(); } -} \ No newline at end of file + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder( + "TaskAttemptContextImpl{"); + sb.append(super.toString()); + sb.append("; taskId=").append(taskId); + sb.append(", status='").append(status).append('\''); + sb.append('}'); + return sb.toString(); + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java index 6112fb53f91..ce1551b3215 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java @@ -278,9 +278,6 @@ private DataInputStream openShuffleUrl(MapHost host, LOG.warn("Connection rejected by the host " + te.host + ". Will retry later."); scheduler.penalize(host, te.backoff); - for (TaskAttemptID left : remaining) { - scheduler.putBackKnownMapOutput(host, left); - } } catch (IOException ie) { boolean connectExcpt = ie instanceof ConnectException; ioErrs.increment(1); @@ -293,11 +290,6 @@ private DataInputStream openShuffleUrl(MapHost host, for(TaskAttemptID left: remaining) { scheduler.copyFailed(left, host, false, connectExcpt); } - - // Add back all the remaining maps, WITHOUT marking them as failed - for(TaskAttemptID left: remaining) { - scheduler.putBackKnownMapOutput(host, left); - } } return input; @@ -332,12 +324,14 @@ protected void copyFromHost(MapHost host) throws IOException { // Construct the url and connect URL url = getMapOutputURL(host, maps); - DataInputStream input = openShuffleUrl(host, remaining, url); - if (input == null) { - return; - } + DataInputStream input = null; try { + input = openShuffleUrl(host, remaining, url); + if (input == null) { + return; + } + // Loop through available map-outputs and fetch them // On any error, faildTasks is not null and we exit // after putting back the remaining maps to the diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleSchedulerImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleSchedulerImpl.java index 2b6dc57c349..d9ce32cefa1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleSchedulerImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleSchedulerImpl.java @@ -217,6 +217,9 @@ public synchronized void copySucceeded(TaskAttemptID mapId, reduceShuffleBytes.increment(bytes); lastProgressTime = Time.monotonicNow(); LOG.debug("map " + mapId + " done " + status.getStateString()); + } else { + LOG.warn("Aborting already-finished MapOutput for " + mapId); + output.abort(); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index ee9b906faa9..6b6faf20329 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -1260,6 +1260,15 @@ + + yarn.app.mapreduce.am.staging-dir.erasurecoding.enabled + false + Whether Erasure Coding should be enabled for + files that are copied to the MR staging area. This is a job-level + setting. + + + mapreduce.am.max-attempts 2 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java index 20b7b7dcfba..d0d7a349323 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java @@ -20,6 +20,11 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.times; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.eq; import java.io.IOException; import java.net.URI; @@ -36,9 +41,12 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies; import org.apache.hadoop.mapred.JobConf; import org.junit.Assert; import org.junit.Test; +import org.mockito.verification.VerificationMode; /** * A class for unit testing JobResourceUploader. @@ -357,6 +365,40 @@ public void testPathsWithFragsAndWildCard() throws IOException { expectedArchivesWithFrags, expectedJobJar); } + @Test + public void testErasureCodingDefault() throws IOException { + testErasureCodingSetting(true); + } + + @Test + public void testErasureCodingDisabled() throws IOException { + testErasureCodingSetting(false); + } + + private void testErasureCodingSetting(boolean defaultBehavior) + throws IOException { + JobConf jConf = new JobConf(); + // don't set to false if EC remains disabled to check default setting + if (!defaultBehavior) { + jConf.setBoolean(MRJobConfig.MR_AM_STAGING_DIR_ERASURECODING_ENABLED, + true); + } + + DistributedFileSystem fs = mock(DistributedFileSystem.class); + Path path = new Path("/"); + when(fs.makeQualified(any(Path.class))).thenReturn(path); + JobResourceUploader uploader = new StubedUploader(fs, true); + Job job = Job.getInstance(jConf); + + uploader.uploadResources(job, new Path("/test")); + + String replicationPolicyName = SystemErasureCodingPolicies + .getReplicationPolicy().getName(); + VerificationMode mode = defaultBehavior ? times(1) : never(); + verify(fs, mode).setErasureCodingPolicy(eq(path), + eq(replicationPolicyName)); + } + private void runTmpResourcePathTest(JobResourceUploader uploader, ResourceConf rConf, JobConf jConf, String[] expectedFiles, String[] expectedArchives, String expectedJobJar) throws IOException { @@ -698,6 +740,10 @@ private class StubedUploader extends JobResourceUploader { super(FileSystem.getLocal(conf), useWildcard); } + StubedUploader(FileSystem fs, boolean useWildcard) throws IOException { + super(fs, useWildcard); + } + @Override FileStatus getFileStatus(Map statCache, Configuration job, Path p) throws IOException { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestPathOutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestPathOutputCommitter.java new file mode 100644 index 00000000000..9cff82fc10b --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestPathOutputCommitter.java @@ -0,0 +1,377 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.lib.output; + +import java.io.IOException; +import java.net.URI; + +import org.junit.Assert; +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.RawComparator; +import org.apache.hadoop.mapreduce.Counter; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.mapreduce.Partitioner; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.TaskInputOutputContext; +import org.apache.hadoop.security.Credentials; + +/** + * Test the path output committer binding to FileOutputFormat. + */ +public class TestPathOutputCommitter extends Assert { + + @Test + public void testFileOutputCommitterOverrride() throws Throwable { + TaskContext context = new TaskContext(); + Path workPath = new Path("file:///work"); + context.setOutputCommitter( + new SimpleCommitter(new Path("/"), context, workPath)); + assertEquals(workPath, FileOutputFormat.getWorkOutputPath(context)); + } + + @Test + public void testFileOutputCommitterNullWorkPath() throws Throwable { + TaskContext context = new TaskContext(); + context.setOutputCommitter( + new SimpleCommitter(new Path("/"), context, null)); + assertNull(FileOutputFormat.getWorkOutputPath(context)); + } + + private static class SimpleCommitter extends PathOutputCommitter { + + private final Path workPath; + + SimpleCommitter(Path outputPath, + TaskAttemptContext context, Path workPath) throws IOException { + super(outputPath, context); + this.workPath = workPath; + } + + SimpleCommitter(Path outputPath, + JobContext context, Path workPath) throws IOException { + super(outputPath, context); + this.workPath = workPath; + } + + @Override + public Path getWorkPath() throws IOException { + return workPath; + } + + @Override + public void setupJob(JobContext jobContext) throws IOException { + + } + + @Override + public void setupTask(TaskAttemptContext taskContext) throws IOException { + + } + + @Override + public boolean needsTaskCommit(TaskAttemptContext taskContext) + throws IOException { + return false; + } + + @Override + public void commitTask(TaskAttemptContext taskContext) throws IOException { + + } + + @Override + public void abortTask(TaskAttemptContext taskContext) throws IOException { + + } + } + + /** + * Stub task context. + */ + public class TaskContext + implements TaskInputOutputContext { + + private OutputCommitter outputCommitter; + + public void setOutputCommitter(OutputCommitter outputCommitter) { + this.outputCommitter = outputCommitter; + } + + @Override + public OutputCommitter getOutputCommitter() { + return outputCommitter; + } + + @Override + public boolean nextKeyValue() throws IOException, InterruptedException { + return false; + } + + @Override + public String getCurrentKey() throws IOException, InterruptedException { + return null; + } + + @Override + public String getCurrentValue() throws IOException, InterruptedException { + return null; + } + + @Override + public void write(String key, String value) + throws IOException, InterruptedException { + } + + + @Override + public TaskAttemptID getTaskAttemptID() { + return null; + } + + @Override + public void setStatus(String msg) { + } + + @Override + public String getStatus() { + return null; + } + + @Override + public float getProgress() { + return 0; + } + + @Override + public Counter getCounter(Enum counterName) { + return null; + } + + @Override + public Counter getCounter(String groupName, String counterName) { + return null; + } + + @Override + public Configuration getConfiguration() { + return null; + } + + @Override + public Credentials getCredentials() { + return null; + } + + @Override + public JobID getJobID() { + return null; + } + + @Override + public int getNumReduceTasks() { + return 0; + } + + @Override + public Path getWorkingDirectory() throws IOException { + return null; + } + + @Override + public Class getOutputKeyClass() { + return null; + } + + @Override + public Class getOutputValueClass() { + return null; + } + + @Override + public Class getMapOutputKeyClass() { + return null; + } + + @Override + public Class getMapOutputValueClass() { + return null; + } + + @Override + public String getJobName() { + return null; + } + + @Override + public Class> getInputFormatClass() + throws ClassNotFoundException { + return null; + } + + @Override + public Class> getMapperClass() + throws ClassNotFoundException { + return null; + } + + @Override + public Class> getCombinerClass() + throws ClassNotFoundException { + return null; + } + + @Override + public Class> getReducerClass() + throws ClassNotFoundException { + return null; + } + + @Override + public Class> getOutputFormatClass() + throws ClassNotFoundException { + return null; + } + + @Override + public Class> getPartitionerClass() + throws ClassNotFoundException { + return null; + } + + @Override + public RawComparator getSortComparator() { + return null; + } + + @Override + public String getJar() { + return null; + } + + @Override + public RawComparator getCombinerKeyGroupingComparator() { + return null; + } + + @Override + public RawComparator getGroupingComparator() { + return null; + } + + @Override + public boolean getJobSetupCleanupNeeded() { + return false; + } + + @Override + public boolean getTaskCleanupNeeded() { + return false; + } + + @Override + public boolean getProfileEnabled() { + return false; + } + + @Override + public String getProfileParams() { + return null; + } + + @Override + public Configuration.IntegerRanges getProfileTaskRange(boolean isMap) { + return null; + } + + @Override + public String getUser() { + return null; + } + + @Override + public boolean getSymlink() { + return false; + } + + @Override + public Path[] getArchiveClassPaths() { + return new Path[0]; + } + + @Override + public URI[] getCacheArchives() throws IOException { + return new URI[0]; + } + + @Override + public URI[] getCacheFiles() throws IOException { + return new URI[0]; + } + + @Override + public Path[] getLocalCacheArchives() throws IOException { + return new Path[0]; + } + + @Override + public Path[] getLocalCacheFiles() throws IOException { + return new Path[0]; + } + + @Override + public Path[] getFileClassPaths() { + return new Path[0]; + } + + @Override + public String[] getArchiveTimestamps() { + return new String[0]; + } + + @Override + public String[] getFileTimestamps() { + return new String[0]; + } + + @Override + public int getMaxMapAttempts() { + return 0; + } + + @Override + public int getMaxReduceAttempts() { + return 0; + } + + @Override + public void progress() { + } + } + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java index 01e51e9d070..934afd747e7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java @@ -25,9 +25,7 @@ import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.MapOutputFile; import org.apache.hadoop.mapreduce.MRJobConfig; -import org.apache.hadoop.mapreduce.TaskID; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -66,8 +64,6 @@ import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; -import com.nimbusds.jose.util.StringUtils; - /** * Test that the Fetcher does what we expect it to. */ @@ -475,7 +471,10 @@ public void testCopyFromHostWithRetryThenTimeout() throws Exception { underTest.copyFromHost(host); verify(allErrs).increment(1); - verify(ss).copyFailed(map1ID, host, false, false); + verify(ss, times(1)).copyFailed(map1ID, host, false, false); + verify(ss, times(1)).copyFailed(map2ID, host, false, false); + verify(ss, times(1)).putBackKnownMapOutput(any(MapHost.class), eq(map1ID)); + verify(ss, times(1)).putBackKnownMapOutput(any(MapHost.class), eq(map2ID)); } @Test diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestShuffleScheduler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestShuffleScheduler.java index 654b7488b98..5d0a0270e0a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestShuffleScheduler.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestShuffleScheduler.java @@ -18,6 +18,8 @@ package org.apache.hadoop.mapreduce.task.reduce; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; + import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.io.compress.CompressionCodec; @@ -283,6 +285,84 @@ public void addFetchFailedMap(TaskAttemptID mapTaskId) { scheduler.copyFailed(failedAttemptID, host1, true, false); } + @Test + public void testDuplicateCopySucceeded() throws Exception { + JobConf job = new JobConf(); + job.setNumMapTasks(2); + //mock creation + TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class); + Reporter mockReporter = mock(Reporter.class); + FileSystem mockFileSystem = mock(FileSystem.class); + Class combinerClass = + job.getCombinerClass(); + @SuppressWarnings("unchecked") // needed for mock with generic + CombineOutputCollector mockCombineOutputCollector = + (CombineOutputCollector) mock(CombineOutputCollector.class); + org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = + mock(org.apache.hadoop.mapreduce.TaskAttemptID.class); + LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class); + CompressionCodec mockCompressionCodec = mock(CompressionCodec.class); + Counter mockCounter = mock(Counter.class); + TaskStatus mockTaskStatus = mock(TaskStatus.class); + Progress mockProgress = mock(Progress.class); + MapOutputFile mockMapOutputFile = mock(MapOutputFile.class); + Task mockTask = mock(Task.class); + @SuppressWarnings("unchecked") + MapOutput output1 = mock(MapOutput.class); + @SuppressWarnings("unchecked") + MapOutput output2 = mock(MapOutput.class); + @SuppressWarnings("unchecked") + MapOutput output3 = mock(MapOutput.class); + + ShuffleConsumerPlugin.Context context = + new ShuffleConsumerPlugin.Context( + mockTaskAttemptID, job, mockFileSystem, + mockUmbilical, mockLocalDirAllocator, + mockReporter, mockCompressionCodec, + combinerClass, mockCombineOutputCollector, + mockCounter, mockCounter, mockCounter, + mockCounter, mockCounter, mockCounter, + mockTaskStatus, mockProgress, mockProgress, + mockTask, mockMapOutputFile, null); + TaskStatus status = new TaskStatus() { + @Override + public boolean getIsMap() { + return false; + } + @Override + public void addFetchFailedMap(TaskAttemptID mapTaskId) { + } + }; + Progress progress = new Progress(); + ShuffleSchedulerImpl scheduler = new ShuffleSchedulerImpl(job, + status, null, null, progress, context.getShuffledMapsCounter(), + context.getReduceShuffleBytes(), context.getFailedShuffleCounter()); + + MapHost host1 = new MapHost("host1", null); + TaskAttemptID succeedAttempt1ID = new TaskAttemptID( + new org.apache.hadoop.mapred.TaskID( + new JobID("test", 0), TaskType.MAP, 0), 0); + TaskAttemptID succeedAttempt2ID = new TaskAttemptID( + new org.apache.hadoop.mapred.TaskID( + new JobID("test", 0), TaskType.MAP, 0), 1); + TaskAttemptID succeedAttempt3ID = new TaskAttemptID( + new org.apache.hadoop.mapred.TaskID( + new JobID("test", 0), TaskType.MAP, 1), 0); + + long bytes = (long)500 * 1024 * 1024; + //First successful copy for map 0 should commit output + scheduler.copySucceeded(succeedAttempt1ID, host1, bytes, 0, 1, output1); + verify(output1).commit(); + + //Second successful copy for map 0 should abort output + scheduler.copySucceeded(succeedAttempt2ID, host1, bytes, 0, 1, output2); + verify(output2).abort(); + + //First successful copy for map 1 should commit output + scheduler.copySucceeded(succeedAttempt3ID, host1, bytes, 0, 1, output3); + verify(output3).commit(); + } + private static String copyMessage(int attemptNo, double rate1, double rate2) { int attemptZero = attemptNo - 1; return String.format("copy task(attempt_test_0000_m_%06d_%d succeeded at %1.2f MB/s)" diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java index 62aa4972929..94f741a03e6 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java @@ -66,6 +66,8 @@ import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; import org.apache.hadoop.yarn.api.records.SignalContainerCommand; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.YarnClusterMetrics; @@ -517,4 +519,22 @@ public void killApplication(ApplicationId appId, String diagnostics) throws YarnException, IOException { client.killApplication(appId, diagnostics); } + + @Override + public Map getResourceProfiles() + throws YarnException, IOException { + return client.getResourceProfiles(); + } + + @Override + public Resource getResourceProfile(String profile) + throws YarnException, IOException { + return client.getResourceProfile(profile); + } + + @Override + public List getResourceTypeInfo() + throws YarnException, IOException { + return client.getResourceTypeInfo(); + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java index 65eac654845..a9b4626581b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java @@ -72,6 +72,10 @@ import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -104,6 +108,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileResponse; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest; @@ -494,6 +500,26 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( throws YarnException, IOException { return null; } + + @Override + public GetAllResourceProfilesResponse getResourceProfiles( + GetAllResourceProfilesRequest request) + throws YarnException, IOException { + return null; + } + + @Override + public GetResourceProfileResponse getResourceProfile( + GetResourceProfileRequest request) throws YarnException, IOException { + return null; + } + + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) + throws YarnException, IOException { + return null; + } } class HistoryService extends AMService implements HSClientProtocol { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java index 274f405529a..22cb5301d0d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java @@ -95,11 +95,13 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.log4j.Level; import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; +import org.junit.Assume; import org.junit.BeforeClass; import org.junit.Test; @@ -402,6 +404,10 @@ private void testSleepJobInternal(Configuration sleepConf, @Test(timeout = 3000000) public void testJobWithChangePriority() throws Exception { + Configuration sleepConf = new Configuration(mrCluster.getConfig()); + // Assumption can be removed when FS priority support is implemented + Assume.assumeFalse(sleepConf.get(YarnConfiguration.RM_SCHEDULER) + .equals(FairScheduler.class.getCanonicalName())); if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) { LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR @@ -409,7 +415,6 @@ public void testJobWithChangePriority() throws Exception { return; } - Configuration sleepConf = new Configuration(mrCluster.getConfig()); // set master address to local to test that local mode applied if framework // equals local sleepConf.set(MRConfig.MASTER_ADDRESS, "local"); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/java/org/apache/hadoop/mapred/nativetask/NativeRuntime.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/java/org/apache/hadoop/mapred/nativetask/NativeRuntime.java index a0e88bd6249..cc9adba8069 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/java/org/apache/hadoop/mapred/nativetask/NativeRuntime.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/java/org/apache/hadoop/mapred/nativetask/NativeRuntime.java @@ -103,7 +103,7 @@ public synchronized static long registerLibrary(String libraryName, String clazz } /** - * destroy native object We use to destory native handlers + * destroy native object We use to destroy native handlers */ public synchronized static void releaseNativeObject(long addr) { assertNativeLibraryLoaded(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/pom.xml index 77bfe0a2319..7fc68f0b6eb 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/pom.xml @@ -35,6 +35,13 @@ org.apache.hadoop hadoop-yarn-server-common + + + + com.microsoft.sqlserver + mssql-jdbc + + org.apache.hadoop diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/QuasiMonteCarlo.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/QuasiMonteCarlo.java index 1a0c3726c9a..3048fd3ba2d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/QuasiMonteCarlo.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/QuasiMonteCarlo.java @@ -155,7 +155,7 @@ public static class QmcMapper extends /** Map method. * @param offset samples starting from the (offset+1)th sample. * @param size the number of samples for this map - * @param context output {ture->numInside, false->numOutside} + * @param context output {true->numInside, false->numOutside} */ public void map(LongWritable offset, LongWritable size, diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 4ba54ad5885..cfdf0840000 100755 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -76,7 +76,7 @@ 1.7.25 - 1.0 + 1.1 @@ -90,7 +90,7 @@ 11.0.2 4.0 - 2.9.4 + 2.9.9 2.0.0-M21 @@ -584,7 +584,7 @@ org.apache.httpcomponents httpcore - 4.4.4 + 4.4.6 commons-codec @@ -856,11 +856,6 @@ - - net.java.dev.jets3t - jets3t - 0.9.0 - com.amazonaws aws-java-sdk-bundle @@ -869,12 +864,12 @@ org.apache.mina mina-core - 2.0.0-M5 + 2.0.16 org.apache.sshd sshd-core - 0.14.0 + 1.6.0 org.apache.ftpserver @@ -914,7 +909,7 @@ org.apache.commons commons-configuration2 - 2.1 + 2.1.1 org.apache.commons @@ -1071,7 +1066,7 @@ com.googlecode.json-simple json-simple - 1.1 + 1.1.1 @@ -1245,7 +1240,7 @@ com.nimbusds nimbus-jose-jwt - 3.9 + 4.41.1 compile @@ -1329,7 +1324,7 @@ org.apache.kerby kerb-simplekdc - 1.0.0 + 1.0.1 org.apache.geronimo.specs diff --git a/hadoop-project/src/site/markdown/index.md.vm b/hadoop-project/src/site/markdown/index.md.vm index bb7bda2c822..d9443d63dc6 100644 --- a/hadoop-project/src/site/markdown/index.md.vm +++ b/hadoop-project/src/site/markdown/index.md.vm @@ -204,6 +204,16 @@ in both the task configuration and as a Java option. Existing configs that already specify both are not affected by this change. See the full release notes of MAPREDUCE-5785 for more details. +S3Guard: Consistency and Metadata Caching for the S3A filesystem client +--------------------- + +[HADOOP-13345](https://issues.apache.org/jira/browse/HADOOP-13345) adds an +optional feature to the S3A client of Amazon S3 storage: the ability to use +a DynamoDB table as a fast and consistent store of file and directory +metadata. + +See [S3Guard](./hadoop-aws/tools/hadoop-aws/s3guard.html) for more details. + Getting Started =============== diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml index eda8574c7a0..205fe63e3d8 100644 --- a/hadoop-project/src/site/site.xml +++ b/hadoop-project/src/site/site.xml @@ -146,6 +146,7 @@ +

diff --git a/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml b/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml index 82ec16eec11..26155666544 100644 --- a/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml +++ b/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml @@ -22,10 +22,6 @@ - - - - diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index 6bab9a708ec..47788cd5055 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -193,7 +193,6 @@ **/ITestJets3tNativeS3FileSystemContract.java **/ITestS3AContractRootDir.java - **/ITestS3NContractRootDir.java **/ITestS3AFileContextStatistics.java **/ITestS3AEncryptionSSEC*.java **/ITestS3AHuge*.java @@ -226,7 +225,6 @@ **/ITestJets3tNativeS3FileSystemContract.java **/ITestS3AContractRootDir.java - **/ITestS3NContractRootDir.java **/ITestS3AFileContextStatistics.java **/ITestS3AHuge*.java **/ITestS3AEncryptionSSEC*.java @@ -428,11 +426,6 @@ test test-jar - - net.java.dev.jets3t - jets3t - compile - com.amazonaws aws-java-sdk-bundle diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 1a464d0ce6f..d278bdf2caf 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -136,7 +136,10 @@ private Constants() { public static final String BUFFER_DIR = "fs.s3a.buffer.dir"; // switch to the fast block-by-block upload mechanism + // this is the only supported upload mechanism + @Deprecated public static final String FAST_UPLOAD = "fs.s3a.fast.upload"; + @Deprecated public static final boolean DEFAULT_FAST_UPLOAD = false; //initial size of memory buffer for a fast upload @@ -451,4 +454,13 @@ private Constants() { public static final String FAIL_INJECT_INCONSISTENCY_PROBABILITY = "fs.s3a.failinject.inconsistency.probability"; + /** + * S3 API level parameters. + */ + @InterfaceStability.Unstable + public static final String LIST_VERSION = "fs.s3a.list.version"; + + @InterfaceStability.Unstable + public static final int DEFAULT_LIST_VERSION = 2; + } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java index 5e9cb3fa521..6476f5d2e0d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentAmazonS3Client.java @@ -28,6 +28,8 @@ import com.amazonaws.services.s3.model.DeleteObjectsRequest; import com.amazonaws.services.s3.model.DeleteObjectsResult; import com.amazonaws.services.s3.model.ListObjectsRequest; +import com.amazonaws.services.s3.model.ListObjectsV2Request; +import com.amazonaws.services.s3.model.ListObjectsV2Result; import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.PutObjectRequest; import com.amazonaws.services.s3.model.PutObjectResult; @@ -109,8 +111,10 @@ public S3ObjectSummary summary() { } } - /** Map of key to delay -> time it was deleted + object summary (object - * summary is null for prefixes. */ + /** + * Map of key to delay -> time it was deleted + object summary (object summary + * is null for prefixes. + */ private Map delayedDeletes = new HashMap<>(); /** Map of key to delay -> time it was created. */ @@ -196,17 +200,29 @@ public PutObjectResult putObject(PutObjectRequest putObjectRequest) return super.putObject(putObjectRequest); } - /* We should only need to override this version of listObjects() */ + /* We should only need to override these versions of listObjects() */ @Override public ObjectListing listObjects(ListObjectsRequest listObjectsRequest) throws AmazonClientException, AmazonServiceException { LOG.debug("prefix {}", listObjectsRequest.getPrefix()); ObjectListing listing = super.listObjects(listObjectsRequest); - listing = filterListObjects(listObjectsRequest, listing); + listing = filterListObjects(listing); listing = restoreListObjects(listObjectsRequest, listing); return listing; } + /* We should only need to override these versions of listObjects() */ + @Override + public ListObjectsV2Result listObjectsV2(ListObjectsV2Request request) + throws AmazonClientException, AmazonServiceException { + LOG.debug("prefix {}", request.getPrefix()); + ListObjectsV2Result listing = super.listObjectsV2(request); + listing = filterListObjectsV2(listing); + listing = restoreListObjectsV2(request, listing); + return listing; + } + + private void addSummaryIfNotPresent(List list, S3ObjectSummary item) { // Behavior of S3ObjectSummary @@ -282,21 +298,58 @@ private ObjectListing restoreListObjects(ListObjectsRequest request, // recursive list has no delimiter, returns everything that matches a // prefix. boolean recursiveObjectList = !("/".equals(request.getDelimiter())); + String prefix = request.getPrefix(); + + restoreDeleted(outputList, outputPrefixes, recursiveObjectList, prefix); + return new CustomObjectListing(rawListing, outputList, outputPrefixes); + } + + /** + * V2 list API variant of + * {@link #restoreListObjects(ListObjectsRequest, ObjectListing)}. + * @param request original v2 list request + * @param result raw s3 result + */ + private ListObjectsV2Result restoreListObjectsV2(ListObjectsV2Request request, + ListObjectsV2Result result) { + List outputList = result.getObjectSummaries(); + List outputPrefixes = result.getCommonPrefixes(); + // recursive list has no delimiter, returns everything that matches a + // prefix. + boolean recursiveObjectList = !("/".equals(request.getDelimiter())); + String prefix = request.getPrefix(); + + restoreDeleted(outputList, outputPrefixes, recursiveObjectList, prefix); + return new CustomListObjectsV2Result(result, outputList, outputPrefixes); + } + + + /** + * Main logic for + * {@link #restoreListObjects(ListObjectsRequest, ObjectListing)} and + * the v2 variant above. + * @param summaries object summary list to modify. + * @param prefixes prefix list to modify + * @param recursive true if recursive list request + * @param prefix prefix for original list request + */ + private void restoreDeleted(List summaries, + List prefixes, boolean recursive, String prefix) { // Go through all deleted keys for (String key : new HashSet<>(delayedDeletes.keySet())) { Delete delete = delayedDeletes.get(key); if (isKeyDelayed(delete.time(), key)) { - if (isDescendant(request.getPrefix(), key, recursiveObjectList)) { + if (isDescendant(prefix, key, recursive)) { if (delete.summary() != null) { - addSummaryIfNotPresent(outputList, delete.summary()); + addSummaryIfNotPresent(summaries, delete.summary()); } } // Non-recursive list has delimiter: will return rolled-up prefixes for // all keys that are not direct children - if (!recursiveObjectList) { - if (isDescendant(request.getPrefix(), key, true)) { - addPrefixIfNotPresent(outputPrefixes, request.getPrefix(), key); + if (!recursive) { + if (isDescendant(prefix, key, true)) { + addPrefixIfNotPresent(prefixes, prefix, key); } } } else { @@ -304,31 +357,52 @@ private ObjectListing restoreListObjects(ListObjectsRequest request, delayedDeletes.remove(key); } } + } + + private ObjectListing filterListObjects(ObjectListing rawListing) { + + // Filter object listing + List outputList = filterSummaries( + rawListing.getObjectSummaries()); + + // Filter prefixes (directories) + List outputPrefixes = filterPrefixes( + rawListing.getCommonPrefixes()); return new CustomObjectListing(rawListing, outputList, outputPrefixes); } - private ObjectListing filterListObjects(ListObjectsRequest request, - ObjectListing rawListing) { - + private ListObjectsV2Result filterListObjectsV2(ListObjectsV2Result raw) { // Filter object listing + List outputList = filterSummaries( + raw.getObjectSummaries()); + + // Filter prefixes (directories) + List outputPrefixes = filterPrefixes(raw.getCommonPrefixes()); + + return new CustomListObjectsV2Result(raw, outputList, outputPrefixes); + } + + private List filterSummaries( + List summaries) { List outputList = new ArrayList<>(); - for (S3ObjectSummary s : rawListing.getObjectSummaries()) { + for (S3ObjectSummary s : summaries) { String key = s.getKey(); if (!isKeyDelayed(delayedPutKeys.get(key), key)) { outputList.add(s); } } + return outputList; + } - // Filter prefixes (directories) + private List filterPrefixes(List prefixes) { List outputPrefixes = new ArrayList<>(); - for (String key : rawListing.getCommonPrefixes()) { + for (String key : prefixes) { if (!isKeyDelayed(delayedPutKeys.get(key), key)) { outputPrefixes.add(key); } } - - return new CustomObjectListing(rawListing, outputList, outputPrefixes); + return outputPrefixes; } private boolean isKeyDelayed(Long enqueueTime, String key) { @@ -342,7 +416,7 @@ private boolean isKeyDelayed(Long enqueueTime, String key) { delayedDeletes.remove(key); LOG.debug("no longer delaying {}", key); return false; - } else { + } else { LOG.info("delaying {}", key); return true; } @@ -431,4 +505,37 @@ public List getCommonPrefixes() { return customPrefixes; } } + + private static class CustomListObjectsV2Result extends ListObjectsV2Result { + + private final List customListing; + private final List customPrefixes; + + CustomListObjectsV2Result(ListObjectsV2Result raw, + List customListing, List customPrefixes) { + super(); + this.customListing = customListing; + this.customPrefixes = customPrefixes; + + this.setBucketName(raw.getBucketName()); + this.setCommonPrefixes(raw.getCommonPrefixes()); + this.setDelimiter(raw.getDelimiter()); + this.setEncodingType(raw.getEncodingType()); + this.setStartAfter(raw.getStartAfter()); + this.setMaxKeys(raw.getMaxKeys()); + this.setContinuationToken(raw.getContinuationToken()); + this.setPrefix(raw.getPrefix()); + this.setTruncated(raw.isTruncated()); + } + + @Override + public List getObjectSummaries() { + return customListing; + } + + @Override + public List getCommonPrefixes() { + return customPrefixes; + } + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java index 8efa2181543..d9f059b2a2e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java @@ -19,8 +19,6 @@ package org.apache.hadoop.fs.s3a; import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.S3ObjectSummary; import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.fs.FileStatus; @@ -90,7 +88,7 @@ ProvidedFileStatusIterator createProvidedFileStatusIterator( */ FileStatusListingIterator createFileStatusListingIterator( Path listPath, - ListObjectsRequest request, + S3ListRequest request, PathFilter filter, Listing.FileStatusAcceptor acceptor) throws IOException { return createFileStatusListingIterator(listPath, request, filter, acceptor, @@ -112,7 +110,7 @@ FileStatusListingIterator createFileStatusListingIterator( */ FileStatusListingIterator createFileStatusListingIterator( Path listPath, - ListObjectsRequest request, + S3ListRequest request, PathFilter filter, Listing.FileStatusAcceptor acceptor, RemoteIterator providedStatus) throws IOException { @@ -432,7 +430,7 @@ private boolean requestNextBatch() throws IOException { * @param objects the next object listing * @return true if this added any entries after filtering */ - private boolean buildNextStatusBatch(ObjectListing objects) { + private boolean buildNextStatusBatch(S3ListResult objects) { // counters for debug logs int added = 0, ignored = 0; // list to fill in with results. Initial size will be list maximum. @@ -512,13 +510,16 @@ public int getBatchSize() { * * Thread safety: none. */ - class ObjectListingIterator implements RemoteIterator { + class ObjectListingIterator implements RemoteIterator { /** The path listed. */ private final Path listPath; /** The most recent listing results. */ - private ObjectListing objects; + private S3ListResult objects; + + /** The most recent listing request. */ + private S3ListRequest request; /** Indicator that this is the first listing. */ private boolean firstListing = true; @@ -542,10 +543,11 @@ class ObjectListingIterator implements RemoteIterator { * */ ObjectListingIterator( Path listPath, - ListObjectsRequest request) { + S3ListRequest request) { this.listPath = listPath; this.maxKeys = owner.getMaxKeys(); this.objects = owner.listObjects(request); + this.request = request; } /** @@ -569,7 +571,7 @@ public boolean hasNext() throws IOException { * @throws NoSuchElementException if there is no more data to list. */ @Override - public ObjectListing next() throws IOException { + public S3ListResult next() throws IOException { if (firstListing) { // on the first listing, don't request more data. // Instead just clear the firstListing flag so that it future calls @@ -585,7 +587,7 @@ public ObjectListing next() throws IOException { // need to request a new set of objects. LOG.debug("[{}], Requesting next {} objects under {}", listingCount, maxKeys, listPath); - objects = owner.continueListObjects(objects); + objects = owner.continueListObjects(request, objects); listingCount++; LOG.debug("New listing status: {}", this); } catch (AmazonClientException e) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index c22383a3f02..f4709a7e117 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -53,8 +53,8 @@ import com.amazonaws.services.s3.model.GetObjectMetadataRequest; import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; import com.amazonaws.services.s3.model.ListObjectsRequest; +import com.amazonaws.services.s3.model.ListObjectsV2Request; import com.amazonaws.services.s3.model.MultiObjectDeleteException; -import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.PartETag; import com.amazonaws.services.s3.model.PutObjectRequest; @@ -163,10 +163,10 @@ public class S3AFileSystem extends FileSystem { // The maximum number of entries that can be deleted in any call to s3 private static final int MAX_ENTRIES_TO_DELETE = 1000; - private boolean blockUploadEnabled; private String blockOutputBuffer; private S3ADataBlocks.BlockFactory blockFactory; private int blockOutputActiveBlocks; + private boolean useListV1; /** Add any deprecated keys. */ @SuppressWarnings("deprecation") @@ -261,6 +261,13 @@ public StorageStatistics provide() { BlockingThreadPoolExecutorService.newDaemonThreadFactory( "s3a-transfer-unbounded")); + int listVersion = conf.getInt(LIST_VERSION, DEFAULT_LIST_VERSION); + if (listVersion < 1 || listVersion > 2) { + LOG.warn("Configured fs.s3a.list.version {} is invalid, forcing " + + "version 2", listVersion); + } + useListV1 = (listVersion == 1); + initTransferManager(); initCannedAcls(conf); @@ -273,21 +280,20 @@ public StorageStatistics provide() { inputPolicy = S3AInputPolicy.getPolicy( conf.getTrimmed(INPUT_FADVISE, INPUT_FADV_NORMAL)); - blockUploadEnabled = conf.getBoolean(FAST_UPLOAD, DEFAULT_FAST_UPLOAD); + boolean blockUploadEnabled = conf.getBoolean(FAST_UPLOAD, true); - if (blockUploadEnabled) { - blockOutputBuffer = conf.getTrimmed(FAST_UPLOAD_BUFFER, - DEFAULT_FAST_UPLOAD_BUFFER); - partSize = ensureOutputParameterInRange(MULTIPART_SIZE, partSize); - blockFactory = S3ADataBlocks.createFactory(this, blockOutputBuffer); - blockOutputActiveBlocks = intOption(conf, - FAST_UPLOAD_ACTIVE_BLOCKS, DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS, 1); - LOG.debug("Using S3ABlockOutputStream with buffer = {}; block={};" + - " queue limit={}", - blockOutputBuffer, partSize, blockOutputActiveBlocks); - } else { - LOG.debug("Using S3AOutputStream"); + if (!blockUploadEnabled) { + LOG.warn("The \"slow\" output stream is no longer supported"); } + blockOutputBuffer = conf.getTrimmed(FAST_UPLOAD_BUFFER, + DEFAULT_FAST_UPLOAD_BUFFER); + partSize = ensureOutputParameterInRange(MULTIPART_SIZE, partSize); + blockFactory = S3ADataBlocks.createFactory(this, blockOutputBuffer); + blockOutputActiveBlocks = intOption(conf, + FAST_UPLOAD_ACTIVE_BLOCKS, DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS, 1); + LOG.debug("Using S3ABlockOutputStream with buffer = {}; block={};" + + " queue limit={}", + blockOutputBuffer, partSize, blockOutputActiveBlocks); metadataStore = S3Guard.getMetadataStore(this); allowAuthoritative = conf.getBoolean(METADATASTORE_AUTHORITATIVE, @@ -636,33 +642,18 @@ public FSDataOutputStream create(Path f, FsPermission permission, } instrumentation.fileCreated(); - FSDataOutputStream output; - if (blockUploadEnabled) { - output = new FSDataOutputStream( - new S3ABlockOutputStream(this, - key, - new SemaphoredDelegatingExecutor(boundedThreadPool, - blockOutputActiveBlocks, true), - progress, - partSize, - blockFactory, - instrumentation.newOutputStreamStatistics(statistics), - new WriteOperationHelper(key) - ), - null); - } else { - - // We pass null to FSDataOutputStream so it won't count writes that - // are being buffered to a file - output = new FSDataOutputStream( - new S3AOutputStream(getConf(), - this, - key, - progress - ), - null); - } - return output; + return new FSDataOutputStream( + new S3ABlockOutputStream(this, + key, + new SemaphoredDelegatingExecutor(boundedThreadPool, + blockOutputActiveBlocks, true), + progress, + partSize, + blockFactory, + instrumentation.newOutputStreamStatistics(statistics), + new WriteOperationHelper(key) + ), + null); } /** @@ -1056,21 +1047,37 @@ protected ObjectMetadata getObjectMetadata(String key) { * @param request request to initiate * @return the results */ - protected ObjectListing listObjects(ListObjectsRequest request) { + protected S3ListResult listObjects(S3ListRequest request) { incrementStatistic(OBJECT_LIST_REQUESTS); incrementReadOperations(); - return s3.listObjects(request); + if (useListV1) { + Preconditions.checkArgument(request.isV1()); + return S3ListResult.v1(s3.listObjects(request.getV1())); + } else { + Preconditions.checkArgument(!request.isV1()); + return S3ListResult.v2(s3.listObjectsV2(request.getV2())); + } } /** * List the next set of objects. - * @param objects paged result + * @param request last list objects request to continue + * @param prevResult last paged result to continue from * @return the next result object */ - protected ObjectListing continueListObjects(ObjectListing objects) { + protected S3ListResult continueListObjects(S3ListRequest request, + S3ListResult prevResult) { incrementStatistic(OBJECT_CONTINUE_LIST_REQUESTS); incrementReadOperations(); - return s3.listNextBatchOfObjects(objects); + if (useListV1) { + Preconditions.checkArgument(request.isV1()); + return S3ListResult.v1(s3.listNextBatchOfObjects(prevResult.getV1())); + } else { + Preconditions.checkArgument(!request.isV1()); + request.getV2().setContinuationToken(prevResult.getV2() + .getNextContinuationToken()); + return S3ListResult.v2(s3.listObjectsV2(request.getV2())); + } } /** @@ -1464,9 +1471,9 @@ private boolean innerDelete(S3AFileStatus status, boolean recursive) } else { LOG.debug("Getting objects for directory prefix {} to delete", key); - ListObjectsRequest request = createListObjectsRequest(key, null); + S3ListRequest request = createListObjectsRequest(key, null); - ObjectListing objects = listObjects(request); + S3ListResult objects = listObjects(request); List keys = new ArrayList<>(objects.getObjectSummaries().size()); while (true) { @@ -1481,7 +1488,7 @@ private boolean innerDelete(S3AFileStatus status, boolean recursive) } if (objects.isTruncated()) { - objects = continueListObjects(objects); + objects = continueListObjects(request, objects); } else { if (!keys.isEmpty()) { // TODO: HADOOP-13761 S3Guard: retries @@ -1589,7 +1596,7 @@ public FileStatus[] innerListStatus(Path f) throws FileNotFoundException, return S3Guard.dirMetaToStatuses(dirMeta); } - ListObjectsRequest request = createListObjectsRequest(key, "/"); + S3ListRequest request = createListObjectsRequest(key, "/"); LOG.debug("listStatus: doing listObjects for directory {}", key); Listing.FileStatusListingIterator files = @@ -1619,16 +1626,38 @@ public FileStatus[] innerListStatus(Path f) throws FileNotFoundException, * @return the request */ @VisibleForTesting - ListObjectsRequest createListObjectsRequest(String key, + S3ListRequest createListObjectsRequest(String key, String delimiter) { - ListObjectsRequest request = new ListObjectsRequest(); - request.setBucketName(bucket); - request.setMaxKeys(maxKeys); - request.setPrefix(key); - if (delimiter != null) { - request.setDelimiter(delimiter); + return createListObjectsRequest(key, delimiter, null); + } + + private S3ListRequest createListObjectsRequest(String key, + String delimiter, Integer overrideMaxKeys) { + if (!useListV1) { + ListObjectsV2Request request = + new ListObjectsV2Request().withBucketName(bucket) + .withMaxKeys(maxKeys) + .withPrefix(key); + if (delimiter != null) { + request.setDelimiter(delimiter); + } + if (overrideMaxKeys != null) { + request.setMaxKeys(overrideMaxKeys); + } + return S3ListRequest.v2(request); + } else { + ListObjectsRequest request = new ListObjectsRequest(); + request.setBucketName(bucket); + request.setMaxKeys(maxKeys); + request.setPrefix(key); + if (delimiter != null) { + request.setDelimiter(delimiter); + } + if (overrideMaxKeys != null) { + request.setMaxKeys(overrideMaxKeys); + } + return S3ListRequest.v1(request); } - return request; } /** @@ -1885,13 +1914,9 @@ private S3AFileStatus s3GetFileStatus(final Path path, String key, try { key = maybeAddTrailingSlash(key); - ListObjectsRequest request = new ListObjectsRequest(); - request.setBucketName(bucket); - request.setPrefix(key); - request.setDelimiter("/"); - request.setMaxKeys(1); + S3ListRequest request = createListObjectsRequest(key, "/", 1); - ObjectListing objects = listObjects(request); + S3ListResult objects = listObjects(request); Collection prefixes = objects.getCommonPrefixes(); Collection summaries = objects.getObjectSummaries(); @@ -2429,7 +2454,9 @@ public String toString() { sb.append(", cannedACL=").append(cannedACL.toString()); } sb.append(", readAhead=").append(readAhead); - sb.append(", blockSize=").append(getDefaultBlockSize()); + if (getConf() != null) { + sb.append(", blockSize=").append(getDefaultBlockSize()); + } sb.append(", multiPartThreshold=").append(multiPartThreshold); if (serverSideEncryptionAlgorithm != null) { sb.append(", serverSideEncryptionAlgorithm='") @@ -2441,6 +2468,7 @@ public String toString() { } sb.append(", metastore=").append(metadataStore); sb.append(", authoritative=").append(allowAuthoritative); + sb.append(", useListV1=").append(useListV1); sb.append(", boundedExecutor=").append(boundedThreadPool); sb.append(", unboundedExecutor=").append(unboundedThreadPool); sb.append(", statistics {") diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AOutputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AOutputStream.java deleted file mode 100644 index e723b756858..00000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AOutputStream.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a; - -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.model.ObjectMetadata; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.LocalDirAllocator; -import org.apache.hadoop.util.Progressable; - -import org.slf4j.Logger; - -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InterruptedIOException; -import java.io.OutputStream; -import java.util.concurrent.atomic.AtomicBoolean; - -import static org.apache.hadoop.fs.s3a.S3AUtils.*; - -/** - * Output stream to save data to S3. - */ -@InterfaceAudience.Private -@InterfaceStability.Evolving -public class S3AOutputStream extends OutputStream { - private final OutputStream backupStream; - private final File backupFile; - private final AtomicBoolean closed = new AtomicBoolean(false); - private final String key; - private final Progressable progress; - private final S3AFileSystem fs; - - public static final Logger LOG = S3AFileSystem.LOG; - - public S3AOutputStream(Configuration conf, - S3AFileSystem fs, - String key, - Progressable progress) - throws IOException { - this.key = key; - this.progress = progress; - this.fs = fs; - - - backupFile = fs.createTmpFileForWrite("output-", - LocalDirAllocator.SIZE_UNKNOWN, conf); - - LOG.debug("OutputStream for key '{}' writing to tempfile: {}", - key, backupFile); - - this.backupStream = new BufferedOutputStream( - new FileOutputStream(backupFile)); - } - - /** - * Check for the filesystem being open. - * @throws IOException if the filesystem is closed. - */ - void checkOpen() throws IOException { - if (closed.get()) { - throw new IOException("Output Stream closed"); - } - } - - @Override - public void flush() throws IOException { - checkOpen(); - backupStream.flush(); - } - - @Override - public void close() throws IOException { - if (closed.getAndSet(true)) { - return; - } - - backupStream.close(); - LOG.debug("OutputStream for key '{}' closed. Now beginning upload", key); - - try { - final ObjectMetadata om = fs.newObjectMetadata(backupFile.length()); - UploadInfo info = fs.putObject( - fs.newPutObjectRequest( - key, - om, - backupFile)); - ProgressableProgressListener listener = - new ProgressableProgressListener(fs, key, info.getUpload(), progress); - info.getUpload().addProgressListener(listener); - - info.getUpload().waitForUploadResult(); - listener.uploadCompleted(); - // This will delete unnecessary fake parent directories, update any - // MetadataStore - fs.finishedWrite(key, info.getLength()); - } catch (InterruptedException e) { - throw (InterruptedIOException) new InterruptedIOException(e.toString()) - .initCause(e); - } catch (AmazonClientException e) { - throw translateException("saving output", key , e); - } finally { - if (!backupFile.delete()) { - LOG.warn("Could not delete temporary s3a file: {}", backupFile); - } - super.close(); - } - LOG.debug("OutputStream for key '{}' upload complete", key); - } - - @Override - public void write(int b) throws IOException { - checkOpen(); - backupStream.write(b); - } - - @Override - public void write(byte[] b, int off, int len) throws IOException { - checkOpen(); - backupStream.write(b, off, len); - } - -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListRequest.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListRequest.java new file mode 100644 index 00000000000..6b3bd46cfb9 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListRequest.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import com.amazonaws.services.s3.model.ListObjectsRequest; +import com.amazonaws.services.s3.model.ListObjectsV2Request; + +/** + * API version-independent container for S3 List requests. + */ +public class S3ListRequest { + private ListObjectsRequest v1Request; + private ListObjectsV2Request v2Request; + + protected S3ListRequest(ListObjectsRequest v1, ListObjectsV2Request v2) { + v1Request = v1; + v2Request = v2; + } + + /** + * Restricted constructors to ensure v1 or v2, not both. + * @param request v1 request + * @return new list request container + */ + public static S3ListRequest v1(ListObjectsRequest request) { + return new S3ListRequest(request, null); + } + + /** + * Restricted constructors to ensure v1 or v2, not both. + * @param request v2 request + * @return new list request container + */ + public static S3ListRequest v2(ListObjectsV2Request request) { + return new S3ListRequest(null, request); + } + + /** + * Is this a v1 API request or v2? + * @return true if v1, false if v2 + */ + public boolean isV1() { + return v1Request != null; + } + + public ListObjectsRequest getV1() { + return v1Request; + } + + public ListObjectsV2Request getV2() { + return v2Request; + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java new file mode 100644 index 00000000000..e8aff329070 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import com.amazonaws.services.s3.model.ListObjectsV2Result; +import com.amazonaws.services.s3.model.ObjectListing; +import com.amazonaws.services.s3.model.S3ObjectSummary; + +import java.util.List; + +/** + * API version-independent container for S3 List responses. + */ +public class S3ListResult { + private ObjectListing v1Result; + private ListObjectsV2Result v2Result; + + protected S3ListResult(ObjectListing v1, ListObjectsV2Result v2) { + v1Result = v1; + v2Result = v2; + } + + /** + * Restricted constructors to ensure v1 or v2, not both. + * @param result v1 result + * @return new list result container + */ + public static S3ListResult v1(ObjectListing result) { + return new S3ListResult(result, null); + } + + /** + * Restricted constructors to ensure v1 or v2, not both. + * @param result v2 result + * @return new list result container + */ + public static S3ListResult v2(ListObjectsV2Result result) { + return new S3ListResult(null, result); + } + + /** + * Is this a v1 API result or v2? + * @return true if v1, false if v2 + */ + public boolean isV1() { + return v1Result != null; + } + + public ObjectListing getV1() { + return v1Result; + } + + public ListObjectsV2Result getV2() { + return v2Result; + } + + public List getObjectSummaries() { + if (isV1()) { + return v1Result.getObjectSummaries(); + } else { + return v2Result.getObjectSummaries(); + } + } + + public boolean isTruncated() { + if (isV1()) { + return v1Result.isTruncated(); + } else { + return v2Result.isTruncated(); + } + } + + public List getCommonPrefixes() { + if (isV1()) { + return v1Result.getCommonPrefixes(); + } else { + return v2Result.getCommonPrefixes(); + } + + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java deleted file mode 100644 index c9c0f98ec33..00000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java +++ /dev/null @@ -1,481 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3native; - -import static org.apache.hadoop.fs.s3native.NativeS3FileSystem.PATH_DELIMITER; - -import java.io.BufferedInputStream; -import java.io.ByteArrayInputStream; -import java.io.EOFException; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSExceptionMessages; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.security.AccessControlException; -import org.jets3t.service.S3Service; -import org.jets3t.service.S3ServiceException; -import org.jets3t.service.ServiceException; -import org.jets3t.service.StorageObjectsChunk; -import org.jets3t.service.impl.rest.HttpException; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.MultipartPart; -import org.jets3t.service.model.MultipartUpload; -import org.jets3t.service.model.S3Bucket; -import org.jets3t.service.model.S3Object; -import org.jets3t.service.model.StorageObject; -import org.jets3t.service.security.AWSCredentials; -import org.jets3t.service.utils.MultipartUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@InterfaceAudience.Private -@InterfaceStability.Unstable -class Jets3tNativeFileSystemStore implements NativeFileSystemStore { - - private S3Service s3Service; - private S3Bucket bucket; - - private long multipartBlockSize; - private boolean multipartEnabled; - private long multipartCopyBlockSize; - static final long MAX_PART_SIZE = (long)5 * 1024 * 1024 * 1024; - - private String serverSideEncryptionAlgorithm; - - public static final Logger LOG = - LoggerFactory.getLogger(Jets3tNativeFileSystemStore.class); - - @Override - public void initialize(URI uri, Configuration conf) throws IOException { - S3Credentials s3Credentials = new S3Credentials(); - s3Credentials.initialize(uri, conf); - try { - AWSCredentials awsCredentials = - new AWSCredentials(s3Credentials.getAccessKey(), - s3Credentials.getSecretAccessKey()); - this.s3Service = new RestS3Service(awsCredentials); - } catch (S3ServiceException e) { - handleException(e); - } - multipartEnabled = - conf.getBoolean("fs.s3n.multipart.uploads.enabled", false); - multipartBlockSize = Math.min( - conf.getLong("fs.s3n.multipart.uploads.block.size", 64 * 1024 * 1024), - MAX_PART_SIZE); - multipartCopyBlockSize = Math.min( - conf.getLong("fs.s3n.multipart.copy.block.size", MAX_PART_SIZE), - MAX_PART_SIZE); - serverSideEncryptionAlgorithm = conf.get("fs.s3n.server-side-encryption-algorithm"); - - bucket = new S3Bucket(uri.getHost()); - } - - @Override - public void storeFile(String key, File file, byte[] md5Hash) - throws IOException { - - if (multipartEnabled && file.length() >= multipartBlockSize) { - storeLargeFile(key, file, md5Hash); - return; - } - - BufferedInputStream in = null; - try { - in = new BufferedInputStream(new FileInputStream(file)); - S3Object object = new S3Object(key); - object.setDataInputStream(in); - object.setContentType("binary/octet-stream"); - object.setContentLength(file.length()); - object.setServerSideEncryptionAlgorithm(serverSideEncryptionAlgorithm); - if (md5Hash != null) { - object.setMd5Hash(md5Hash); - } - s3Service.putObject(bucket, object); - } catch (ServiceException e) { - handleException(e, key); - } finally { - IOUtils.closeStream(in); - } - } - - public void storeLargeFile(String key, File file, byte[] md5Hash) - throws IOException { - S3Object object = new S3Object(key); - object.setDataInputFile(file); - object.setContentType("binary/octet-stream"); - object.setContentLength(file.length()); - object.setServerSideEncryptionAlgorithm(serverSideEncryptionAlgorithm); - if (md5Hash != null) { - object.setMd5Hash(md5Hash); - } - - List objectsToUploadAsMultipart = - new ArrayList(); - objectsToUploadAsMultipart.add(object); - MultipartUtils mpUtils = new MultipartUtils(multipartBlockSize); - - try { - mpUtils.uploadObjects(bucket.getName(), s3Service, - objectsToUploadAsMultipart, null); - } catch (Exception e) { - handleException(e, key); - } - } - - @Override - public void storeEmptyFile(String key) throws IOException { - try { - S3Object object = new S3Object(key); - object.setDataInputStream(new ByteArrayInputStream(new byte[0])); - object.setContentType("binary/octet-stream"); - object.setContentLength(0); - object.setServerSideEncryptionAlgorithm(serverSideEncryptionAlgorithm); - s3Service.putObject(bucket, object); - } catch (ServiceException e) { - handleException(e, key); - } - } - - @Override - public FileMetadata retrieveMetadata(String key) throws IOException { - StorageObject object = null; - try { - LOG.debug("Getting metadata for key: {} from bucket: {}", - key, bucket.getName()); - object = s3Service.getObjectDetails(bucket.getName(), key); - return new FileMetadata(key, object.getContentLength(), - object.getLastModifiedDate().getTime()); - - } catch (ServiceException e) { - try { - // process - handleException(e, key); - return null; - } catch (FileNotFoundException fnfe) { - // and downgrade missing files - return null; - } - } finally { - if (object != null) { - object.closeDataInputStream(); - } - } - } - - /** - * @param key - * The key is the object name that is being retrieved from the S3 bucket - * @return - * This method returns null if the key is not found - * @throws IOException - */ - - @Override - public InputStream retrieve(String key) throws IOException { - try { - LOG.debug("Getting key: {} from bucket: {}", - key, bucket.getName()); - S3Object object = s3Service.getObject(bucket.getName(), key); - return object.getDataInputStream(); - } catch (ServiceException e) { - handleException(e, key); - return null; //return null if key not found - } - } - - /** - * - * @param key - * The key is the object name that is being retrieved from the S3 bucket - * @return - * This method returns null if the key is not found - * @throws IOException - */ - - @Override - public InputStream retrieve(String key, long byteRangeStart) - throws IOException { - try { - LOG.debug("Getting key: {} from bucket: {} with byteRangeStart: {}", - key, bucket.getName(), byteRangeStart); - S3Object object = s3Service.getObject(bucket, key, null, null, null, - null, byteRangeStart, null); - return object.getDataInputStream(); - } catch (ServiceException e) { - handleException(e, key); - return null; - } - } - - @Override - public PartialListing list(String prefix, int maxListingLength) - throws IOException { - return list(prefix, maxListingLength, null, false); - } - - @Override - public PartialListing list(String prefix, int maxListingLength, String priorLastKey, - boolean recurse) throws IOException { - - return list(prefix, recurse ? null : PATH_DELIMITER, maxListingLength, priorLastKey); - } - - /** - * list objects - * @param prefix prefix - * @param delimiter delimiter - * @param maxListingLength max no. of entries - * @param priorLastKey last key in any previous search - * @return a list of matches - * @throws IOException on any reported failure - */ - - private PartialListing list(String prefix, String delimiter, - int maxListingLength, String priorLastKey) throws IOException { - try { - if (!prefix.isEmpty() && !prefix.endsWith(PATH_DELIMITER)) { - prefix += PATH_DELIMITER; - } - StorageObjectsChunk chunk = s3Service.listObjectsChunked(bucket.getName(), - prefix, delimiter, maxListingLength, priorLastKey); - - FileMetadata[] fileMetadata = - new FileMetadata[chunk.getObjects().length]; - for (int i = 0; i < fileMetadata.length; i++) { - StorageObject object = chunk.getObjects()[i]; - fileMetadata[i] = new FileMetadata(object.getKey(), - object.getContentLength(), object.getLastModifiedDate().getTime()); - } - return new PartialListing(chunk.getPriorLastKey(), fileMetadata, - chunk.getCommonPrefixes()); - } catch (ServiceException e) { - handleException(e, prefix); - return null; // never returned - keep compiler happy - } - } - - @Override - public void delete(String key) throws IOException { - try { - LOG.debug("Deleting key: {} from bucket: {}", - key, bucket.getName()); - s3Service.deleteObject(bucket, key); - } catch (ServiceException e) { - handleException(e, key); - } - } - - public void rename(String srcKey, String dstKey) throws IOException { - try { - s3Service.renameObject(bucket.getName(), srcKey, new S3Object(dstKey)); - } catch (ServiceException e) { - handleException(e, srcKey); - } - } - - @Override - public void copy(String srcKey, String dstKey) throws IOException { - try { - if(LOG.isDebugEnabled()) { - LOG.debug("Copying srcKey: " + srcKey + "to dstKey: " + dstKey + "in bucket: " + bucket.getName()); - } - if (multipartEnabled) { - S3Object object = s3Service.getObjectDetails(bucket, srcKey, null, - null, null, null); - if (multipartCopyBlockSize > 0 && - object.getContentLength() > multipartCopyBlockSize) { - copyLargeFile(object, dstKey); - return; - } - } - - S3Object dstObject = new S3Object(dstKey); - dstObject.setServerSideEncryptionAlgorithm(serverSideEncryptionAlgorithm); - s3Service.copyObject(bucket.getName(), srcKey, bucket.getName(), - dstObject, false); - } catch (ServiceException e) { - handleException(e, srcKey); - } - } - - public void copyLargeFile(S3Object srcObject, String dstKey) throws IOException { - try { - long partCount = srcObject.getContentLength() / multipartCopyBlockSize + - (srcObject.getContentLength() % multipartCopyBlockSize > 0 ? 1 : 0); - - MultipartUpload multipartUpload = s3Service.multipartStartUpload - (bucket.getName(), dstKey, srcObject.getMetadataMap()); - - List listedParts = new ArrayList(); - for (int i = 0; i < partCount; i++) { - long byteRangeStart = i * multipartCopyBlockSize; - long byteLength; - if (i < partCount - 1) { - byteLength = multipartCopyBlockSize; - } else { - byteLength = srcObject.getContentLength() % multipartCopyBlockSize; - if (byteLength == 0) { - byteLength = multipartCopyBlockSize; - } - } - - MultipartPart copiedPart = s3Service.multipartUploadPartCopy - (multipartUpload, i + 1, bucket.getName(), srcObject.getKey(), - null, null, null, null, byteRangeStart, - byteRangeStart + byteLength - 1, null); - listedParts.add(copiedPart); - } - - Collections.reverse(listedParts); - s3Service.multipartCompleteUpload(multipartUpload, listedParts); - } catch (ServiceException e) { - handleException(e, srcObject.getKey()); - } - } - - @Override - public void purge(String prefix) throws IOException { - String key = ""; - try { - S3Object[] objects = - s3Service.listObjects(bucket.getName(), prefix, null); - for (S3Object object : objects) { - key = object.getKey(); - s3Service.deleteObject(bucket, key); - } - } catch (S3ServiceException e) { - handleException(e, key); - } - } - - @Override - public void dump() throws IOException { - StringBuilder sb = new StringBuilder("S3 Native Filesystem, "); - sb.append(bucket.getName()).append("\n"); - try { - S3Object[] objects = s3Service.listObjects(bucket.getName()); - for (S3Object object : objects) { - sb.append(object.getKey()).append("\n"); - } - } catch (S3ServiceException e) { - handleException(e); - } - System.out.println(sb); - } - - /** - * Handle any service exception by translating it into an IOException - * @param e exception - * @throws IOException exception -always - */ - private void handleException(Exception e) throws IOException { - throw processException(e, e, ""); - } - /** - * Handle any service exception by translating it into an IOException - * @param e exception - * @param key key sought from object store - - * @throws IOException exception -always - */ - private void handleException(Exception e, String key) throws IOException { - throw processException(e, e, key); - } - - /** - * Handle any service exception by translating it into an IOException - * @param thrown exception - * @param original original exception -thrown if no other translation could - * be made - * @param key key sought from object store or "" for undefined - * @return an exception to throw. If isProcessingCause==true this may be null. - */ - private IOException processException(Throwable thrown, Throwable original, - String key) { - IOException result; - if (thrown.getCause() != null) { - // recurse down - result = processException(thrown.getCause(), original, key); - } else if (thrown instanceof HttpException) { - // nested HttpException - examine error code and react - HttpException httpException = (HttpException) thrown; - String responseMessage = httpException.getResponseMessage(); - int responseCode = httpException.getResponseCode(); - String bucketName = "s3n://" + bucket.getName(); - String text = String.format("%s : %03d : %s", - bucketName, - responseCode, - responseMessage); - String filename = !key.isEmpty() ? (bucketName + "/" + key) : text; - IOException ioe; - switch (responseCode) { - case 404: - result = new FileNotFoundException(filename); - break; - case 416: // invalid range - result = new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF - +": " + filename); - break; - case 403: //forbidden - result = new AccessControlException("Permission denied" - +": " + filename); - break; - default: - result = new IOException(text); - } - result.initCause(thrown); - } else if (thrown instanceof S3ServiceException) { - S3ServiceException se = (S3ServiceException) thrown; - LOG.debug( - "S3ServiceException: {}: {} : {}", - se.getS3ErrorCode(), se.getS3ErrorMessage(), se, se); - if ("InvalidRange".equals(se.getS3ErrorCode())) { - result = new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); - } else { - result = new S3Exception(se); - } - } else if (thrown instanceof ServiceException) { - ServiceException se = (ServiceException) thrown; - LOG.debug("S3ServiceException: {}: {} : {}", - se.getErrorCode(), se.toString(), se, se); - result = new S3Exception(se); - } else if (thrown instanceof IOException) { - result = (IOException) thrown; - } else { - // here there is no exception derived yet. - // this means no inner cause, and no translation made yet. - // convert the original to an IOException -rather than just the - // exception at the base of the tree - result = new S3Exception(original); - } - - return result; - } -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java deleted file mode 100644 index f26cdac937e..00000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java +++ /dev/null @@ -1,67 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3native; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; - -/** - *

- * An abstraction for a key-based {@link File} store. - *

- */ -@InterfaceAudience.Private -@InterfaceStability.Unstable -interface NativeFileSystemStore { - - void initialize(URI uri, Configuration conf) throws IOException; - - void storeFile(String key, File file, byte[] md5Hash) throws IOException; - void storeEmptyFile(String key) throws IOException; - - FileMetadata retrieveMetadata(String key) throws IOException; - InputStream retrieve(String key) throws IOException; - InputStream retrieve(String key, long byteRangeStart) throws IOException; - - PartialListing list(String prefix, int maxListingLength) throws IOException; - PartialListing list(String prefix, int maxListingLength, String priorLastKey, boolean recursive) - throws IOException; - - void delete(String key) throws IOException; - - void copy(String srcKey, String dstKey) throws IOException; - - /** - * Delete all keys with the given prefix. Used for testing. - * @throws IOException - */ - void purge(String prefix) throws IOException; - - /** - * Diagnostic method to dump state to the console. - * @throws IOException - */ - void dump() throws IOException; -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java index 1a45db311ea..5a7129f7d5c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,312 +18,48 @@ package org.apache.hadoop.fs.s3native; -import java.io.BufferedOutputStream; -import java.io.EOFException; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.net.URI; -import java.security.DigestOutputStream; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeSet; -import java.util.concurrent.TimeUnit; -import com.google.common.base.Preconditions; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.BufferedFSInputStream; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FSExceptionMessages; -import org.apache.hadoop.fs.FSInputStream; -import org.apache.hadoop.fs.FileAlreadyExistsException; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalDirAllocator; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.io.retry.RetryPolicies; -import org.apache.hadoop.io.retry.RetryPolicy; -import org.apache.hadoop.io.retry.RetryProxy; -import org.apache.hadoop.util.Progressable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys.S3_NATIVE_BUFFER_DIR_DEFAULT; -import static org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys.S3_NATIVE_BUFFER_DIR_KEY; -import static org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys.S3_NATIVE_MAX_RETRIES_DEFAUL; -import static org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys.S3_NATIVE_MAX_RETRIES_KEY; -import static org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys.S3_NATIVE_SLEEP_TIME_DEFAULT; -import static org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys.S3_NATIVE_SLEEP_TIME_KEY; -import static org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys.addDeprecatedConfigKeys; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.Progressable; /** - * A {@link FileSystem} for reading and writing files stored on - *
Amazon S3. - * This implementation stores files on S3 in their native form so they can be - * read by other S3 tools. - *

- * A note about directories. S3 of course has no "native" support for them. - * The idiom we choose then is: for any directory created by this class, - * we use an empty object "#{dirpath}_$folder$" as a marker. - * Further, to interoperate with other S3 tools, we also accept the following: - *

    - *
  • an object "#{dirpath}/' denoting a directory marker
  • - *
  • - * if there exists any objects with the prefix "#{dirpath}/", then the - * directory is said to exist - *
  • - *
  • - * if both a file with the name of a directory and a marker for that - * directory exists, then the *file masks the directory*, and the directory - * is never returned. - *
  • - *
+ * This is a stub filesystem purely present to fail meaningfully when + * someone who explicitly declares + * {@code fs.s3n.impl=org.apache.hadoop.fs.s3native.NativeS3FileSystem} + * and then tries to create a filesystem off an s3n:// URL. + * + * The {@link #initialize(URI, Configuration)} method will throw + * an IOException informing the user of their need to migrate. + * @deprecated Replaced by the S3A client. */ @InterfaceAudience.Public @InterfaceStability.Stable -public class NativeS3FileSystem extends FileSystem { +public final class NativeS3FileSystem extends FileSystem { public static final Logger LOG = LoggerFactory.getLogger(NativeS3FileSystem.class); - - private static final String FOLDER_SUFFIX = "_$folder$"; - static final String PATH_DELIMITER = Path.SEPARATOR; - private static final int S3_MAX_LISTING_LENGTH = 1000; - static { - // Add the deprecated config keys - addDeprecatedConfigKeys(); - } + /** + * Message in thrown exceptions: {@value}. + */ + private static final String UNSUPPORTED = + "The s3n:// client to Amazon S3 is no longer available:" + + " please migrate to the s3a:// client"; - static class NativeS3FsInputStream extends FSInputStream { - - private NativeFileSystemStore store; - private Statistics statistics; - private InputStream in; - private final String key; - private long pos = 0; - - public NativeS3FsInputStream(NativeFileSystemStore store, Statistics statistics, InputStream in, String key) { - Preconditions.checkNotNull(in, "Null input stream"); - this.store = store; - this.statistics = statistics; - this.in = in; - this.key = key; - } - - @Override - public synchronized int read() throws IOException { - int result; - try { - result = in.read(); - } catch (IOException e) { - LOG.info("Received IOException while reading '{}', attempting to reopen", - key); - LOG.debug("{}", e, e); - try { - reopen(pos); - result = in.read(); - } catch (EOFException eof) { - LOG.debug("EOF on input stream read: {}", eof, eof); - result = -1; - } - } - if (result != -1) { - pos++; - } - if (statistics != null && result != -1) { - statistics.incrementBytesRead(1); - } - return result; - } - @Override - public synchronized int read(byte[] b, int off, int len) - throws IOException { - if (in == null) { - throw new EOFException("Cannot read closed stream"); - } - int result = -1; - try { - result = in.read(b, off, len); - } catch (EOFException eof) { - throw eof; - } catch (IOException e) { - LOG.info( "Received IOException while reading '{}'," + - " attempting to reopen.", key); - reopen(pos); - result = in.read(b, off, len); - } - if (result > 0) { - pos += result; - } - if (statistics != null && result > 0) { - statistics.incrementBytesRead(result); - } - return result; - } - - @Override - public synchronized void close() throws IOException { - closeInnerStream(); - } - - /** - * Close the inner stream if not null. Even if an exception - * is raised during the close, the field is set to null - */ - private void closeInnerStream() { - IOUtils.closeStream(in); - in = null; - } - - /** - * Reopen a new input stream with the specified position - * @param pos the position to reopen a new stream - * @throws IOException - */ - private synchronized void reopen(long pos) throws IOException { - LOG.debug("Reopening key '{}' for reading at position '{}", key, pos); - InputStream newStream = store.retrieve(key, pos); - updateInnerStream(newStream, pos); - } - - /** - * Update inner stream with a new stream and position - * @param newStream new stream -must not be null - * @param newpos new position - * @throws IOException IO exception on a failure to close the existing - * stream. - */ - private synchronized void updateInnerStream(InputStream newStream, long newpos) throws IOException { - Preconditions.checkNotNull(newStream, "Null newstream argument"); - closeInnerStream(); - in = newStream; - this.pos = newpos; - } - - @Override - public synchronized void seek(long newpos) throws IOException { - if (newpos < 0) { - throw new EOFException( - FSExceptionMessages.NEGATIVE_SEEK); - } - if (pos != newpos) { - // the seek is attempting to move the current position - reopen(newpos); - } - } - - @Override - public synchronized long getPos() throws IOException { - return pos; - } - @Override - public boolean seekToNewSource(long targetPos) throws IOException { - return false; - } - } - - private class NativeS3FsOutputStream extends OutputStream { - - private Configuration conf; - private String key; - private File backupFile; - private OutputStream backupStream; - private MessageDigest digest; - private boolean closed; - private LocalDirAllocator lDirAlloc; - - public NativeS3FsOutputStream(Configuration conf, - NativeFileSystemStore store, String key, Progressable progress, - int bufferSize) throws IOException { - this.conf = conf; - this.key = key; - this.backupFile = newBackupFile(); - LOG.info("OutputStream for key '" + key + "' writing to tempfile '" + this.backupFile + "'"); - try { - this.digest = MessageDigest.getInstance("MD5"); - this.backupStream = new BufferedOutputStream(new DigestOutputStream( - new FileOutputStream(backupFile), this.digest)); - } catch (NoSuchAlgorithmException e) { - LOG.warn("Cannot load MD5 digest algorithm," + - "skipping message integrity check.", e); - this.backupStream = new BufferedOutputStream( - new FileOutputStream(backupFile)); - } - } - - private File newBackupFile() throws IOException { - if (conf.get(S3_NATIVE_BUFFER_DIR_KEY, null) != null) { - lDirAlloc = new LocalDirAllocator(S3_NATIVE_BUFFER_DIR_KEY); - } else { - lDirAlloc = new LocalDirAllocator(S3_NATIVE_BUFFER_DIR_DEFAULT); - } - File result = lDirAlloc.createTmpFileForWrite("output-", LocalDirAllocator.SIZE_UNKNOWN, conf); - result.deleteOnExit(); - return result; - } - - @Override - public void flush() throws IOException { - backupStream.flush(); - } - - @Override - public synchronized void close() throws IOException { - if (closed) { - return; - } - - backupStream.close(); - LOG.info("OutputStream for key '{}' closed. Now beginning upload", key); - - try { - byte[] md5Hash = digest == null ? null : digest.digest(); - store.storeFile(key, backupFile, md5Hash); - } finally { - if (!backupFile.delete()) { - LOG.warn("Could not delete temporary s3n file: " + backupFile); - } - super.close(); - closed = true; - } - LOG.info("OutputStream for key '{}' upload complete", key); - } - - @Override - public void write(int b) throws IOException { - backupStream.write(b); - } - - @Override - public void write(byte[] b, int off, int len) throws IOException { - backupStream.write(b, off, len); - } - } - - private URI uri; - private NativeFileSystemStore store; - private Path workingDir; - public NativeS3FileSystem() { - // set store in initialize() - } - - public NativeS3FileSystem(NativeFileSystemStore store) { - this.store = store; } /** @@ -336,504 +72,77 @@ public String getScheme() { return "s3n"; } + /** + * Always fail to initialize. + * @throws IOException always. + */ @Override public void initialize(URI uri, Configuration conf) throws IOException { super.initialize(uri, conf); - if (store == null) { - store = createDefaultStore(conf); - } - store.initialize(uri, conf); - setConf(conf); - this.uri = S3xLoginHelper.buildFSURI(uri); - this.workingDir = - new Path("/user", System.getProperty("user.name")).makeQualified(this.uri, this.getWorkingDirectory()); - } - - private static NativeFileSystemStore createDefaultStore(Configuration conf) { - NativeFileSystemStore store = new Jets3tNativeFileSystemStore(); - - RetryPolicy basePolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep( - conf.getInt(S3_NATIVE_MAX_RETRIES_KEY, S3_NATIVE_MAX_RETRIES_DEFAUL), - conf.getLong(S3_NATIVE_SLEEP_TIME_KEY, S3_NATIVE_SLEEP_TIME_DEFAULT), - TimeUnit.SECONDS); - Map, RetryPolicy> exceptionToPolicyMap = - new HashMap, RetryPolicy>(); - exceptionToPolicyMap.put(IOException.class, basePolicy); - exceptionToPolicyMap.put(S3Exception.class, basePolicy); - - RetryPolicy methodPolicy = RetryPolicies.retryByException( - RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap); - Map methodNameToPolicyMap = - new HashMap(); - methodNameToPolicyMap.put("storeFile", methodPolicy); - methodNameToPolicyMap.put("rename", methodPolicy); - - return (NativeFileSystemStore) - RetryProxy.create(NativeFileSystemStore.class, store, - methodNameToPolicyMap); - } - - private static String pathToKey(Path path) { - if (path.toUri().getScheme() != null && path.toUri().getPath().isEmpty()) { - // allow uris without trailing slash after bucket to refer to root, - // like s3n://mybucket - return ""; - } - if (!path.isAbsolute()) { - throw new IllegalArgumentException("Path must be absolute: " + path); - } - String ret = path.toUri().getPath().substring(1); // remove initial slash - if (ret.endsWith("/") && (ret.indexOf("/") != ret.length() - 1)) { - ret = ret.substring(0, ret.length() -1); - } - return ret; - } - - private static Path keyToPath(String key) { - return new Path("/" + key); - } - - private Path makeAbsolute(Path path) { - if (path.isAbsolute()) { - return path; - } - return new Path(workingDir, path); - } - - /** - * Check that a Path belongs to this FileSystem. - * Unlike the superclass, this version does not look at authority, - * only hostnames. - * @param path to check - * @throws IllegalArgumentException if there is an FS mismatch - */ - @Override - protected void checkPath(Path path) { - S3xLoginHelper.checkPath(getConf(), getUri(), path, getDefaultPort()); - } - - @Override - protected URI canonicalizeUri(URI rawUri) { - return S3xLoginHelper.canonicalizeUri(rawUri, getDefaultPort()); - } - - /** This optional operation is not yet supported. */ - @Override - public FSDataOutputStream append(Path f, int bufferSize, - Progressable progress) throws IOException { - throw new UnsupportedOperationException("Append is not supported " - + "by NativeS3FileSystem"); - } - - @Override - public FSDataOutputStream create(Path f, FsPermission permission, - boolean overwrite, int bufferSize, short replication, long blockSize, - Progressable progress) throws IOException { - - if (exists(f) && !overwrite) { - throw new FileAlreadyExistsException("File already exists: " + f); - } - - if(LOG.isDebugEnabled()) { - LOG.debug("Creating new file '" + f + "' in S3"); - } - Path absolutePath = makeAbsolute(f); - String key = pathToKey(absolutePath); - return new FSDataOutputStream(new NativeS3FsOutputStream(getConf(), store, - key, progress, bufferSize), statistics); - } - - @Override - public boolean delete(Path f, boolean recurse) throws IOException { - FileStatus status; - try { - status = getFileStatus(f); - } catch (FileNotFoundException e) { - if(LOG.isDebugEnabled()) { - LOG.debug("Delete called for '" + f + - "' but file does not exist, so returning false"); - } - return false; - } - Path absolutePath = makeAbsolute(f); - String key = pathToKey(absolutePath); - if (status.isDirectory()) { - if (!recurse && listStatus(f).length > 0) { - throw new IOException("Can not delete " + f + " as is a not empty directory and recurse option is false"); - } - - createParent(f); - - if(LOG.isDebugEnabled()) { - LOG.debug("Deleting directory '" + f + "'"); - } - String priorLastKey = null; - do { - PartialListing listing = store.list(key, S3_MAX_LISTING_LENGTH, priorLastKey, true); - for (FileMetadata file : listing.getFiles()) { - store.delete(file.getKey()); - } - priorLastKey = listing.getPriorLastKey(); - } while (priorLastKey != null); - - try { - store.delete(key + FOLDER_SUFFIX); - } catch (FileNotFoundException e) { - //this is fine, we don't require a marker - } - } else { - if(LOG.isDebugEnabled()) { - LOG.debug("Deleting file '" + f + "'"); - } - createParent(f); - store.delete(key); - } - return true; + throw new IOException(UNSUPPORTED); } @Override public FileStatus getFileStatus(Path f) throws IOException { - Path absolutePath = makeAbsolute(f); - String key = pathToKey(absolutePath); - - if (key.length() == 0) { // root always exists - return newDirectory(absolutePath); - } - - if(LOG.isDebugEnabled()) { - LOG.debug("getFileStatus retrieving metadata for key '" + key + "'"); - } - FileMetadata meta = store.retrieveMetadata(key); - if (meta != null) { - if(LOG.isDebugEnabled()) { - LOG.debug("getFileStatus returning 'file' for key '" + key + "'"); - } - return newFile(meta, absolutePath); - } - if (store.retrieveMetadata(key + FOLDER_SUFFIX) != null) { - if(LOG.isDebugEnabled()) { - LOG.debug("getFileStatus returning 'directory' for key '" + key + - "' as '" + key + FOLDER_SUFFIX + "' exists"); - } - return newDirectory(absolutePath); - } - - if(LOG.isDebugEnabled()) { - LOG.debug("getFileStatus listing key '" + key + "'"); - } - PartialListing listing = store.list(key, 1); - if (listing.getFiles().length > 0 || - listing.getCommonPrefixes().length > 0) { - if(LOG.isDebugEnabled()) { - LOG.debug("getFileStatus returning 'directory' for key '" + key + - "' as it has contents"); - } - return newDirectory(absolutePath); - } - - if(LOG.isDebugEnabled()) { - LOG.debug("getFileStatus could not find key '" + key + "'"); - } - throw new FileNotFoundException("No such file or directory '" + absolutePath + "'"); + throw new UnsupportedOperationException(UNSUPPORTED); } @Override public URI getUri() { - return uri; - } - - /** - *

- * If f is a file, this method will make a single call to S3. - * If f is a directory, this method will make a maximum of - * (n / 1000) + 2 calls to S3, where n is the total number of - * files and directories contained directly in f. - *

- */ - @Override - public FileStatus[] listStatus(Path f) throws IOException { - - Path absolutePath = makeAbsolute(f); - String key = pathToKey(absolutePath); - - if (key.length() > 0) { - FileMetadata meta = store.retrieveMetadata(key); - if (meta != null) { - return new FileStatus[] { newFile(meta, absolutePath) }; - } - } - - URI pathUri = absolutePath.toUri(); - Set status = new TreeSet(); - String priorLastKey = null; - do { - PartialListing listing = store.list(key, S3_MAX_LISTING_LENGTH, priorLastKey, false); - for (FileMetadata fileMetadata : listing.getFiles()) { - Path subpath = keyToPath(fileMetadata.getKey()); - String relativePath = pathUri.relativize(subpath.toUri()).getPath(); - - if (fileMetadata.getKey().equals(key + "/")) { - // this is just the directory we have been asked to list - } - else if (relativePath.endsWith(FOLDER_SUFFIX)) { - status.add(newDirectory(new Path( - absolutePath, - relativePath.substring(0, relativePath.indexOf(FOLDER_SUFFIX))))); - } - else { - status.add(newFile(fileMetadata, subpath)); - } - } - for (String commonPrefix : listing.getCommonPrefixes()) { - Path subpath = keyToPath(commonPrefix); - String relativePath = pathUri.relativize(subpath.toUri()).getPath(); - // sometimes the common prefix includes the base dir (HADOOP-13830). - // avoid that problem by detecting it and keeping it out - // of the list - if (!relativePath.isEmpty()) { - status.add(newDirectory(new Path(absolutePath, relativePath))); - } - } - priorLastKey = listing.getPriorLastKey(); - } while (priorLastKey != null); - - if (status.isEmpty() && - key.length() > 0 && - store.retrieveMetadata(key + FOLDER_SUFFIX) == null) { - throw new FileNotFoundException("File " + f + " does not exist."); - } - - return status.toArray(new FileStatus[status.size()]); - } - - private FileStatus newFile(FileMetadata meta, Path path) { - return new FileStatus(meta.getLength(), false, 1, getDefaultBlockSize(), - meta.getLastModified(), path.makeQualified(this.getUri(), this.getWorkingDirectory())); - } - - private FileStatus newDirectory(Path path) { - return new FileStatus(0, true, 1, 0, 0, path.makeQualified(this.getUri(), this.getWorkingDirectory())); - } - - @Override - public boolean mkdirs(Path f, FsPermission permission) throws IOException { - Path absolutePath = makeAbsolute(f); - List paths = new ArrayList(); - do { - paths.add(0, absolutePath); - absolutePath = absolutePath.getParent(); - } while (absolutePath != null); - - boolean result = true; - for (Path path : paths) { - result &= mkdir(path); - } - return result; - } - - private boolean mkdir(Path f) throws IOException { - try { - FileStatus fileStatus = getFileStatus(f); - if (fileStatus.isFile()) { - throw new FileAlreadyExistsException(String.format( - "Can't make directory for path '%s' since it is a file.", f)); - - } - } catch (FileNotFoundException e) { - if(LOG.isDebugEnabled()) { - LOG.debug("Making dir '" + f + "' in S3"); - } - String key = pathToKey(f) + FOLDER_SUFFIX; - store.storeEmptyFile(key); - } - return true; + throw new UnsupportedOperationException(UNSUPPORTED); } @Override public FSDataInputStream open(Path f, int bufferSize) throws IOException { - FileStatus fs = getFileStatus(f); // will throw if the file doesn't exist - if (fs.isDirectory()) { - throw new FileNotFoundException("'" + f + "' is a directory"); - } - LOG.info("Opening '" + f + "' for reading"); - Path absolutePath = makeAbsolute(f); - String key = pathToKey(absolutePath); - return new FSDataInputStream(new BufferedFSInputStream( - new NativeS3FsInputStream(store, statistics, store.retrieve(key), key), bufferSize)); + throw new UnsupportedOperationException(UNSUPPORTED); } - - // rename() and delete() use this method to ensure that the parent directory - // of the source does not vanish. - private void createParent(Path path) throws IOException { - Path parent = path.getParent(); - if (parent != null) { - String key = pathToKey(makeAbsolute(parent)); - if (key.length() > 0) { - store.storeEmptyFile(key + FOLDER_SUFFIX); - } - } + + @Override + public FSDataOutputStream create(Path f, + FsPermission permission, + boolean overwrite, + int bufferSize, + short replication, + long blockSize, + Progressable progress) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED); } - - + + @Override + public FSDataOutputStream append(Path f, + int bufferSize, + Progressable progress) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED); + } + @Override public boolean rename(Path src, Path dst) throws IOException { - - String srcKey = pathToKey(makeAbsolute(src)); - final String debugPreamble = "Renaming '" + src + "' to '" + dst + "' - "; - - if (srcKey.length() == 0) { - // Cannot rename root of file system - if (LOG.isDebugEnabled()) { - LOG.debug(debugPreamble + - "returning false as cannot rename the root of a filesystem"); - } - return false; - } - - //get status of source - boolean srcIsFile; - try { - srcIsFile = getFileStatus(src).isFile(); - } catch (FileNotFoundException e) { - //bail out fast if the source does not exist - if (LOG.isDebugEnabled()) { - LOG.debug(debugPreamble + "returning false as src does not exist"); - } - return false; - } - // Figure out the final destination - String dstKey = pathToKey(makeAbsolute(dst)); - - try { - boolean dstIsFile = getFileStatus(dst).isFile(); - if (dstIsFile) { - //destination is a file. - //you can't copy a file or a directory onto an existing file - //except for the special case of dest==src, which is a no-op - if(LOG.isDebugEnabled()) { - LOG.debug(debugPreamble + - "returning without rename as dst is an already existing file"); - } - //exit, returning true iff the rename is onto self - return srcKey.equals(dstKey); - } else { - //destination exists and is a directory - if(LOG.isDebugEnabled()) { - LOG.debug(debugPreamble + "using dst as output directory"); - } - //destination goes under the dst path, with the name of the - //source entry - dstKey = pathToKey(makeAbsolute(new Path(dst, src.getName()))); - } - } catch (FileNotFoundException e) { - //destination does not exist => the source file or directory - //is copied over with the name of the destination - if(LOG.isDebugEnabled()) { - LOG.debug(debugPreamble + "using dst as output destination"); - } - try { - if (getFileStatus(dst.getParent()).isFile()) { - if(LOG.isDebugEnabled()) { - LOG.debug(debugPreamble + - "returning false as dst parent exists and is a file"); - } - return false; - } - } catch (FileNotFoundException ex) { - if(LOG.isDebugEnabled()) { - LOG.debug(debugPreamble + - "returning false as dst parent does not exist"); - } - return false; - } - } - - //rename to self behavior follows Posix rules and is different - //for directories and files -the return code is driven by src type - if (srcKey.equals(dstKey)) { - //fully resolved destination key matches source: fail - if (LOG.isDebugEnabled()) { - LOG.debug(debugPreamble + "renamingToSelf; returning true"); - } - return true; - } - if (srcIsFile) { - //source is a file; COPY then DELETE - if(LOG.isDebugEnabled()) { - LOG.debug(debugPreamble + - "src is file, so doing copy then delete in S3"); - } - store.copy(srcKey, dstKey); - store.delete(srcKey); - } else { - //src is a directory - if(LOG.isDebugEnabled()) { - LOG.debug(debugPreamble + "src is directory, so copying contents"); - } - //Verify dest is not a child of the parent - if (dstKey.startsWith(srcKey + "/")) { - if (LOG.isDebugEnabled()) { - LOG.debug( - debugPreamble + "cannot rename a directory to a subdirectory of self"); - } - return false; - } - //create the subdir under the destination - store.storeEmptyFile(dstKey + FOLDER_SUFFIX); - - List keysToDelete = new ArrayList(); - String priorLastKey = null; - do { - PartialListing listing = store.list(srcKey, S3_MAX_LISTING_LENGTH, priorLastKey, true); - for (FileMetadata file : listing.getFiles()) { - keysToDelete.add(file.getKey()); - store.copy(file.getKey(), dstKey + file.getKey().substring(srcKey.length())); - } - priorLastKey = listing.getPriorLastKey(); - } while (priorLastKey != null); - - if(LOG.isDebugEnabled()) { - LOG.debug(debugPreamble + - "all files in src copied, now removing src files"); - } - for (String key: keysToDelete) { - store.delete(key); - } - - try { - store.delete(srcKey + FOLDER_SUFFIX); - } catch (FileNotFoundException e) { - //this is fine, we don't require a marker - } - if(LOG.isDebugEnabled()) { - LOG.debug(debugPreamble + "done"); - } - } - - return true; - } - - @Override - public long getDefaultBlockSize() { - return getConf().getLong("fs.s3n.block.size", 64 * 1024 * 1024); + throw new UnsupportedOperationException(UNSUPPORTED); } - /** - * Set the working directory to the given directory. - */ @Override - public void setWorkingDirectory(Path newDir) { - workingDir = newDir; + public boolean delete(Path f, boolean recursive) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED); } - + @Override - public Path getWorkingDirectory() { - return workingDir; + public FileStatus[] listStatus(Path f) + throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED); + } + + @Override + public void setWorkingDirectory(Path new_dir) { + throw new UnsupportedOperationException(UNSUPPORTED); } @Override - public String getCanonicalServiceName() { - // Does not support Token - return null; + public Path getWorkingDirectory() { + throw new UnsupportedOperationException(UNSUPPORTED); + } + + @Override + public boolean mkdirs(Path f, FsPermission permission) throws IOException { + throw new UnsupportedOperationException(UNSUPPORTED); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/PartialListing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/PartialListing.java deleted file mode 100644 index 82900928229..00000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/PartialListing.java +++ /dev/null @@ -1,64 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3native; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; - -/** - *

- * Holds information on a directory listing for a - * {@link NativeFileSystemStore}. - * This includes the {@link FileMetadata files} and directories - * (their names) contained in a directory. - *

- *

- * This listing may be returned in chunks, so a priorLastKey - * is provided so that the next chunk may be requested. - *

- * @see NativeFileSystemStore#list(String, int, String) - */ -@InterfaceAudience.Private -@InterfaceStability.Unstable -class PartialListing { - - private final String priorLastKey; - private final FileMetadata[] files; - private final String[] commonPrefixes; - - public PartialListing(String priorLastKey, FileMetadata[] files, - String[] commonPrefixes) { - this.priorLastKey = priorLastKey; - this.files = files; - this.commonPrefixes = commonPrefixes; - } - - public FileMetadata[] getFiles() { - return files; - } - - public String[] getCommonPrefixes() { - return commonPrefixes; - } - - public String getPriorLastKey() { - return priorLastKey; - } - -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3Credentials.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3Credentials.java deleted file mode 100644 index 713b149dfa8..00000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3Credentials.java +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3native; - -import java.io.IOException; -import java.net.URI; - -import com.google.common.base.Preconditions; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; - -import static org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys.S3_NATIVE_AWS_ACCESS_KEY_ID; -import static org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys.S3_NATIVE_AWS_SECRET_ACCESS_KEY; - -/** - *

- * Extracts AWS credentials from the filesystem URI or configuration. - *

- */ -@InterfaceAudience.Private -@InterfaceStability.Unstable -public class S3Credentials { - - private String accessKey; - private String secretAccessKey; - - /** - * @param uri bucket URI optionally containing username and password. - * @param conf configuration - * @throws IllegalArgumentException if credentials for S3 cannot be - * determined. - * @throws IOException if credential providers are misconfigured and we have - * to talk to them. - */ - public void initialize(URI uri, Configuration conf) throws IOException { - Preconditions.checkArgument(uri.getHost() != null, - "Invalid hostname in URI " + uri); - - String userInfo = uri.getUserInfo(); - if (userInfo != null) { - int index = userInfo.indexOf(':'); - if (index != -1) { - accessKey = userInfo.substring(0, index); - secretAccessKey = userInfo.substring(index + 1); - } else { - accessKey = userInfo; - } - } - - if (accessKey == null) { - accessKey = conf.getTrimmed(S3_NATIVE_AWS_ACCESS_KEY_ID); - } - if (secretAccessKey == null) { - final char[] pass = conf.getPassword(S3_NATIVE_AWS_SECRET_ACCESS_KEY); - if (pass != null) { - secretAccessKey = (new String(pass)).trim(); - } - } - - final String scheme = uri.getScheme(); - Preconditions.checkArgument(!(accessKey == null && secretAccessKey == null), - "AWS Access Key ID and Secret Access Key must be specified as the " + - "username or password (respectively) of a " + scheme + " URL, or " + - "by setting the " + S3_NATIVE_AWS_ACCESS_KEY_ID + " or " + - S3_NATIVE_AWS_SECRET_ACCESS_KEY + " properties (respectively)."); - Preconditions.checkArgument(accessKey != null, - "AWS Access Key ID must be specified as the username of a " + scheme + - " URL, or by setting the " + S3_NATIVE_AWS_ACCESS_KEY_ID + - " property."); - Preconditions.checkArgument(secretAccessKey != null, - "AWS Secret Access Key must be specified as the password of a " + scheme - + " URL, or by setting the " + S3_NATIVE_AWS_SECRET_ACCESS_KEY + - " property."); - } - - public String getAccessKey() { - return accessKey; - } - - public String getSecretAccessKey() { - return secretAccessKey; - } -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3NativeFileSystemConfigKeys.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3NativeFileSystemConfigKeys.java deleted file mode 100644 index 7c8b345fe50..00000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3NativeFileSystemConfigKeys.java +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3native; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configuration.DeprecationDelta; -import org.apache.hadoop.fs.CommonConfigurationKeys; - -/** - * This class contains constants for configuration keys used - * in the s3 file system. - * - */ -@InterfaceAudience.Private -@InterfaceStability.Unstable -public class S3NativeFileSystemConfigKeys extends CommonConfigurationKeys { - public static final String S3_NATIVE_BLOCK_SIZE_KEY = "s3native.blocksize"; - public static final long S3_NATIVE_BLOCK_SIZE_DEFAULT = 64*1024*1024; - public static final String S3_NATIVE_REPLICATION_KEY = "s3native.replication"; - public static final short S3_NATIVE_REPLICATION_DEFAULT = 1; - public static final String S3_NATIVE_STREAM_BUFFER_SIZE_KEY = - "s3native.stream-buffer-size"; - public static final int S3_NATIVE_STREAM_BUFFER_SIZE_DEFAULT = 4096; - public static final String S3_NATIVE_BYTES_PER_CHECKSUM_KEY = - "s3native.bytes-per-checksum"; - public static final int S3_NATIVE_BYTES_PER_CHECKSUM_DEFAULT = 512; - public static final String S3_NATIVE_CLIENT_WRITE_PACKET_SIZE_KEY = - "s3native.client-write-packet-size"; - public static final int S3_NATIVE_CLIENT_WRITE_PACKET_SIZE_DEFAULT = 64*1024; - static final String S3_NATIVE_BUFFER_DIR_KEY = "fs.s3n.buffer.dir"; - static final String S3_NATIVE_BUFFER_DIR_DEFAULT = "${hadoop.tmp.dir}/s3n"; - static final String S3_NATIVE_MAX_RETRIES_KEY = "fs.s3n.maxRetries"; - static final int S3_NATIVE_MAX_RETRIES_DEFAUL = 4; - static final String S3_NATIVE_SLEEP_TIME_KEY = "fs.s3n.sleepTimeSeconds"; - static final int S3_NATIVE_SLEEP_TIME_DEFAULT = 10; - static final String S3_NATIVE_AWS_ACCESS_KEY_ID = "fs.s3n.awsAccessKeyId"; - static final String S3_NATIVE_AWS_SECRET_ACCESS_KEY = - "fs.s3n.awsSecretAccessKey"; - - static void addDeprecatedConfigKeys() { - Configuration.addDeprecations(new DeprecationDelta[]{ - new DeprecationDelta("fs.s3.buffer.dir", S3_NATIVE_BUFFER_DIR_KEY), - new DeprecationDelta("fs.s3.maxRetries", S3_NATIVE_MAX_RETRIES_KEY), - new DeprecationDelta("fs.s3.sleepTimeSeconds", S3_NATIVE_SLEEP_TIME_KEY) - }); - } - -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/package.html b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/package.html index 4d3bde936f4..eb2c47174b2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/package.html +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/package.html @@ -23,8 +23,11 @@ A distributed implementation of {@link org.apache.hadoop.fs.FileSystem} for reading and writing files on Amazon S3. -This implementation stores files on S3 in their native form for interoperability +This implementation stored files on S3 in their native form for interoperability with other S3 tools. + +It has been replaced by the S3A client. +

diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/encryption.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/encryption.md new file mode 100644 index 00000000000..719c5e599fb --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/encryption.md @@ -0,0 +1,427 @@ + + + +# Working with Encrypted S3 Data + + + + +## Introduction + +The S3A filesystem client supports Amazon S3's Server Side Encryption +for at-rest data encryption. +You should to read up on the [AWS documentation](https://docs.aws.amazon.com/AmazonS3/latest/dev/serv-side-encryption.html) +for S3 Server Side Encryption for up to date information on the encryption mechansims. + + + +When configuring an encryption method in the `core-site.xml`, this will apply cluster wide. +Any new file written will be encrypted with this encryption configuration. +When the S3A client reads a file, S3 will attempt to decrypt it using the mechanism +and keys with which the file was encrypted. + +* It is **NOT** advised to mix and match encryption types in a bucket +* It is much simpler and safer to encrypt with just one type and key per bucket. +* You can use AWS bucket policies to mandate encryption rules for a bucket. +* You can use S3A per-bucket configuration to ensure that S3A clients use encryption +policies consistent with the mandated rules. +* Changing the encryption options on the client does not change how existing +files were encrypted, except when the files are renamed. +* For all mechanisms other than SSE-C, clients do not need any configuration +options set in order to read encrypted data: it is all automatically handled +in S3 itself. + +## How data is encrypted + +AWS S3 supports server-side encryption inside the storage system itself. +When an S3 client uploading data requests data to be encrypted, then an encryption key is used +to encrypt the data as it saved to S3. It remains encrypted on S3 until deleted: +clients cannot change the encryption attributes of an object once uploaded. + +The Amazon AWS SDK also offers client-side encryption, in which all the encoding +and decoding of data is performed on the client. This is *not* supported by +the S3A client. + +The server-side "SSE" encryption is performed with symmetric AES256 encryption; +S3 offers different mechanisms for actually defining the key to use. + + +There are thrre key management mechanisms, which in order of simplicity of use, +are: + +* SSE-S3: an AES256 key is generated in S3, and saved alongside the data. +* SSE-KMS: an AES256 key is generated in S3, and encrypted with a secret key provided +by Amazon's Key Management Service, a key referenced by name in the uploading client. +* SSE-C : the client specifies an actual base64 encoded AES-256 key to be used +to encrypt and decrypt the data. + + +## SSE-S3 Amazon S3-Managed Encryption Keys + +In SSE-S3, all keys and secrets are managed inside S3. This is the simplest encryption mechanism. +There is no extra cost for storing data with this option. + + +### Enabling SSE-S3 + +To write S3-SSE encrypted files, the value of +`fs.s3a.server-side-encryption-algorithm` must be set to that of +the encryption mechanism used in `core-site`; currently only `AES256` is supported. + +```xml + + fs.s3a.server-side-encryption-algorithm + AES256 + +``` + +Once set, all new data will be stored encrypted. There is no need to set this property when downloading data — the data will be automatically decrypted when read using +the Amazon S3-managed key. + +To learn more, refer to +[Protecting Data Using Server-Side Encryption with Amazon S3-Managed Encryption Keys (SSE-S3) in AWS documentation](http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption.html). + + +### SSE-KMS: Amazon S3-KMS Managed Encryption Keys + + +Amazon offers a pay-per-use key management service, [AWS KMS](https://aws.amazon.com/documentation/kms/). +This service can be used to encrypt data on S3 by defining "customer master keys", CMKs, +which can be centrally managed and assigned to specific roles and IAM accounts. + +The AWS KMS [can be used encrypt data on S3uploaded data](http://docs.aws.amazon.com/kms/latest/developerguide/services-s3.html). + +> The AWS KMS service is **not** related to the Key Management Service built into Hadoop (*Hadoop KMS*). The *Hadoop KMS* primarily focuses on + managing keys for *HDFS Transparent Encryption*. Similarly, HDFS encryption is unrelated to S3 data encryption. + +When uploading data encrypted with SSE-KMS, the sequence is as follows. + +1. The S3A client must declare a specific CMK in the property `fs.s3a.server-side-encryption.key`, or leave +it blank to use the default configured for that region. + +1. The S3A client uploads all the data as normal, now including encryption information. + +1. The S3 service encrypts the data with a symmetric key unique to the new object. + +1. The S3 service retrieves the chosen CMK key from the KMS service, and, if the user has +the right to use it, uses it to encrypt the object-specific key. + + +When downloading SSE-KMS encrypte data, the sequence is as follows + +1. The S3A client issues an HTTP GET request to read the data. +1. S3 sees that the data was encrypted with SSE-KMS, and looks up the specific key in the KMS service +1. If and only if the requesting user has been granted permission to use the CMS key does +the KMS service provide S3 with the key. +1. As a result, S3 will only decode the data if the user has been granted access to the key. + + +KMS keys can be managed by an organization's administrators in AWS, including +having access permissions assigned and removed from specific users, groups, and IAM roles. +Only those "principals" with granted rights to a key may access it, +hence only they may encrypt data with the key, *and decrypt data encrypted with it*. +This allows KMS to be used to provide a cryptographically secure access control mechanism for data stores on S3. + + +Each KMS server is region specific, and accordingly, so is each CMK configured. +A CMK defined in one region cannot be used with an S3 bucket in a different region. + + +Notes + +* Callers are charged for every use of a key, both for encrypting the data in uploads + and for decrypting it when reading it back. +* Random-access IO on files may result in multiple GET requests of an object during a read +sequence (especially for columnar data), so may require more than one key retrieval to process a single file, +* The KMS service is throttled: too many requests may cause requests to fail. +* As well as incurring charges, heavy I/O *may* reach IO limits for a customer. If those limits are reached, +they can be increased through the AWS console. + + +### Enabling SSE-KMS + +To enable SSE-KMS, the property `fs.s3a.server-side-encryption-algorithm` must be set to `SSE-KMS` in `core-site`: + +```xml + + fs.s3a.server-side-encryption-algorithm + SSE-KMS + +``` + +The ID of the specific key used to encrypt the data should also be set in the property `fs.s3a.server-side-encryption.key`: + +```xml + + fs.s3a.server-side-encryption.key + arn:aws:kms:us-west-2:360379543683:key/071a86ff-8881-4ba0-9230-95af6d01ca01 + +``` + +Organizations may define a default key in the Amazon KMS; if a default key is set, +then it will be used whenever SSE-KMS encryption is chosen and the value of `fs.s3a.server-side-encryption.key` is empty. + +### the S3A `fs.s3a.encryption.key` key only affects created files + +With SSE-KMS, the S3A client option `fs.s3a.server-side-encryption.key` sets the +key to be used when new files are created. When reading files, this key, +and indeed the value of `fs.s3a.server-side-encryption-algorithme` is ignored: +S3 will attempt to retrieve the key and decrypt the file based on the create-time settings. + +This means that + +* There's no need to configure any client simply reading data. +* It is possible for a client to read data encrypted with one KMS key, and +write it with another. + + +## SSE-C: Server side encryption with a client-supplied key. + +In SSE-C, the client supplies the secret key needed to read and write data. +Every client trying to read or write data must be configured with the same +secret key. + + +SSE-C integration with Hadoop is still stabilizing; issues related to it are still surfacing. +It is already clear that SSE-C with a common key must be used exclusively within +a bucket if it is to be used at all. This is the only way to ensure that path and +directory listings do not fail with "Bad Request" errors. + +### Enabling SSE-C + +To use SSE-C, the configuration option `fs.s3a.server-side-encryption-algorithm` +must be set to `SSE-C`, and a base-64 encoding of the key placed in +`fs.s3a.server-side-encryption.key`. + +```xml + + fs.s3a.server-side-encryption-algorithm + SSE-C + + + + fs.s3a.server-side-encryption.key + SGVscCwgSSdtIHRyYXBwZWQgaW5zaWRlIGEgYmFzZS02NC1jb2RlYyE= + +``` + +All clients must share this same key. + +### The `fs.s3a.encryption.key` value is used to read and write data + +With SSE-C, the S3A client option `fs.s3a.server-side-encryption.key` sets the +key to be used for both reading *and* writing data. + +When reading any file written with SSE-C, the same key must be set +in the property `fs.s3a.server-side-encryption.key`. + +This is unlike SSE-S3 and SSE-KMS, where the information needed to +decode data is kept in AWS infrastructure. + + +### SSE-C Warning + +You need to fully understand how SSE-C works in the S3 +environment before using this encryption type. Please refer to the Server Side +Encryption documentation available from AWS. SSE-C is only recommended for +advanced users with advanced encryption use cases. Failure to properly manage +encryption keys can cause data loss. Currently, the AWS S3 API(and thus S3A) +only supports one encryption key and cannot support decrypting objects during +moves under a previous key to a new destination. It is **NOT** advised to use +multiple encryption keys in a bucket, and is recommended to use one key per +bucket and to not change this key. This is due to when a request is made to S3, +the actual encryption key must be provided to decrypt the object and access the +metadata. Since only one encryption key can be provided at a time, S3A will not +pass the correct encryption key to decrypt the data. + + +## Encryption best practises + + +### Mandate encryption through policies + +Because it is up to the clients to enable encryption on new objects, all clients +must be correctly configured in order to guarantee that data is encrypted. + + +To mandate that all data uploaded to a bucket is encrypted, +you can set a [bucket policy](https://aws.amazon.com/blogs/security/how-to-prevent-uploads-of-unencrypted-objects-to-amazon-s3/) +declaring that clients must provide encryption information with all data uploaded. + + +* Mandating an encryption mechanism on newly uploaded data does not encrypt existing data; existing data will retain whatever encryption (if any) applied at the time of creation* + +Here is a policy to mandate `SSE-S3/AES265` encryption on all data uploaded to a bucket. This covers uploads as well as the copy operations which take place when file/directory rename operations are mimicked. + + +```json +{ + "Version": "2012-10-17", + "Id": "EncryptionPolicy", + "Statement": [ + { + "Sid": "RequireEncryptionHeaderOnPut", + "Effect": "Deny", + "Principal": "*", + "Action": [ + "s3:PutObject" + ], + "Resource": "arn:aws:s3:::BUCKET/*", + "Condition": { + "Null": { + "s3:x-amz-server-side-encryption": true + } + } + }, + { + "Sid": "RequireAESEncryptionOnPut", + "Effect": "Deny", + "Principal": "*", + "Action": [ + "s3:PutObject" + ], + "Resource": "arn:aws:s3:::BUCKET/*", + "Condition": { + "StringNotEquals": { + "s3:x-amz-server-side-encryption": "AES256" + } + } + } + ] +} +``` + +To use SSE-KMS, a different restriction must be defined: + + +```json +{ + "Version": "2012-10-17", + "Id": "EncryptionPolicy", + "Statement": [ + { + "Sid": "RequireEncryptionHeaderOnPut", + "Effect": "Deny", + "Principal": "*", + "Action": [ + "s3:PutObject" + ], + "Resource": "arn:aws:s3:::BUCKET/*", + "Condition": { + "Null": { + "s3:x-amz-server-side-encryption": true + } + } + }, + { + "Sid": "RequireKMSEncryptionOnPut", + "Effect": "Deny", + "Principal": "*", + "Action": [ + "s3:PutObject" + ], + "Resource": "arn:aws:s3:::BUCKET/*", + "Condition": { + "StringNotEquals": { + "s3:x-amz-server-side-encryption": "SSE-KMS" + } + } + } + ] +} +``` + +To use one of these policies: + +1. Replace `BUCKET` with the specific name of the bucket being secured. +1. Locate the bucket in the AWS console [S3 section](https://console.aws.amazon.com/s3/home). +1. Select the "Permissions" tab. +1. Select the "Bucket Policy" tab in the permissions section. +1. Paste the edited policy into the form. +1. Save the policy. + +### Use S3a per-bucket configuration to control encryption settings + +In an organisation which has embraced S3 encryption, different buckets inevitably have +different encryption policies, such as different keys for SSE-KMS encryption. +In particular, as different keys need to be named for different regions, unless +you rely on the administrator-managed "default" key for each S3 region, you +will need unique keys. + +S3A's per-bucket configuration enables this. + + +Here, for example, are settings for a bucket in London, `london-stats`: + + +```xml + + fs.s3a.bucket.london-stats.server-side-encryption-algorithm + AES256 + +``` + +This requests SSE-S; if matched with a bucket policy then all data will +be encrypted as it is uploaded. + + +A different bucket can use a different policy +(here SSE-KMS) and, when necessary, declare a key. + +Here is an example bucket in S3 Ireland, which uses SSE-KMS and +a KMS key hosted in the AWS-KMS service in the same region. + + +```xml + + fs.s3a.bucket.ireland-dev.server-side-encryption-algorithm + SSE-KMS + + + + fs.s3a.bucket.ireland-dev.server-side-encryption.key + arn:aws:kms:eu-west-1:98067faff834c:key/071a86ff-8881-4ba0-9230-95af6d01ca01 + + +``` + +Again the approprate bucket policy can be used to guarantee that all callers +will use SSE-KMS; they can even mandata the name of the key used to encrypt +the data, so guaranteeing that access to thee data can be read by everyone +granted access to that key, and nobody without access to it. + + +### Use rename() to encrypt files with new keys + +The encryption of an object is set when it is uploaded. If you want to encrypt +an unencrypted file, or change the SEE-KMS key of a file, the only way to do +so is by copying the object. + +How can you do that from Hadoop? With `rename()`. + +The S3A client mimics a real filesystem's' rename operation by copying all the +source files to the destination paths, then deleting the old ones. +If you do a rename() + +Note: this does not work for SSE-C, because you cannot set a different key +for reading as for writing, and you must supply that key for reading. There +you need to copy one bucket to a different bucket, one with a different key. +Use `distCp`for this, with per-bucket encryption policies. + + +## Troubleshooting Encryption + +The [troubleshooting](./troubleshooting_s3a.html) document covers +stack traces which may surface when working with encrypted data. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index b8d37c633ce..75c638f5831 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -15,40 +15,128 @@ # Hadoop-AWS module: Integration with Amazon Web Services - + -## Overview +**NOTE: Hadoop's `s3:` and `s3n:` connectors have been removed. +Please use `s3a:` as the connector to data hosted in S3 with Apache Hadoop.** -The `hadoop-aws` module provides support for AWS integration. The generated -JAR file, `hadoop-aws.jar` also declares a transitive dependency on all -external artifacts which are needed for this support —enabling downstream -applications to easily use this support. - -To make it part of Apache Hadoop's default classpath, simply make sure that -HADOOP_OPTIONAL_TOOLS in hadoop-env.sh has 'hadoop-aws' in the list. - -### Features - -**NOTE: `s3:` has been phased out; `s3n:`, while -distributed should now be considered deprecated. -Please use `s3a:` as the connector to data hosted in S3.** - -1. The second-generation, `s3n:` filesystem, making it easy to share -data between hadoop and other applications via the S3 object store. -1. The third generation, `s3a:` filesystem. Designed to be a switch in -replacement for `s3n:`, this filesystem binding supports larger files and promises -higher performance. - -The specifics of using these filesystems are documented in this section. +**Consult the [s3n documentation](./s3n.html) for migration instructions.** See also: -* [Testing](testing.html) -* [Troubleshooting S3a](troubleshooting_s3a.html) -* [S3Guard](s3guard.html) +* [Encryption](./encryption.html) +* [S3Guard](./s3guard.html) +* [Troubleshooting](./troubleshooting_s3a.html) +* [Testing](./testing.html) -### Warning #1: Object Stores are not filesystems +## Overview + +Apache Hadoop's `hadoop-aws` module provides support for AWS integration. +applications to easily use this support. + +To include the S3A client in Apache Hadoop's default classpath: + +1. Make sure that`HADOOP_OPTIONAL_TOOLS` in `hadoop-env.sh` includes `hadoop-aws` +in its list of optional modules to add in the classpath. + +1. For client side interaction, you can declare that relevant JARs must be loaded +in your `~/.hadooprc` file: + + hadoop_add_to_classpath_tools hadoop-aws + +The settings in this file does not propagate to deployed applications, but it will +work for local clients such as the `hadoop fs` command. + + +## Introducing the Hadoop S3A client. + +Hadoop's "S3A" client offers high-performance IO against Amazon S3 object store +and compatible implementations. + +* Directly reads and writes S3 objects. +* Compatible with standard S3 clients. +* Compatible with files created by the older `s3n://` client and Amazon EMR's `s3://` client. +* Supports partitioned uploads for many-GB objects. +* Offers a high-performance random IO mode for working with columnar data such +as Apache ORC and Apache Parquet files. +* Uses Amazon's Java S3 SDK with support for latest S3 features and authentication +schemes. +* Supports authentication via: environment variables, Hadoop configuration +properties, the Hadoop key management store and IAM roles. +* Supports per-bucket configuration. +* With [S3Guard](./s3guard.html), adds high performance and consistent metadata/ +directory read operations. This delivers consistency as well as speed. +* Supports S3 "Server Side Encryption" for both reading and writing: + SSE-S3, SSE-KMS and SSE-C +* Instrumented with Hadoop metrics. +* Actively maintained by the open source community. + + +### Other S3 Connectors + +There other Hadoop connectors to S3. Only S3A is actively maintained by +the Hadoop project itself. + +1. Apache's Hadoop's original `s3://` client. This is no longer included in Hadoop. +1. Amazon EMR's `s3://` client. This is from the Amazon EMR team, who actively +maintain it. +1. Apache's Hadoop's [`s3n:` filesystem client](./s3n.html). + This connectore is no longer available: users must migrate to the newer `s3a:` client. + + +## Getting Started + +S3A depends upon two JARs, alongside `hadoop-common` and its dependencies. + +* `hadoop-aws` JAR. +* `aws-java-sdk-bundle` JAR. + +The versions of `hadoop-common` and `hadoop-aws` must be identical. + +To import the libraries into a Maven build, add `hadoop-aws` JAR to the +build dependencies; it will pull in a compatible aws-sdk JAR. + +The `hadoop-aws` JAR *does not* declare any dependencies other than that +dependencies unique to it, the AWS SDK JAR. This is simplify excluding/tuning +Hadoop dependency JARs in downstream applications. The `hadoop-client` or +`hadoop-common` dependency must be declared + + +```xml + + + 3.0.0 + + + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + + + org.apache.hadoop + hadoop-aws + ${hadoop.version} + + +``` + +## Warnings + +Amazon S3 is an example of "an object store". In order to achieve scalability +and especially high availability, S3 has —as many other cloud object stores have +done— relaxed some of the constraints which classic "POSIX" filesystems promise. + +The [S3Guard](./s3guard.html) feature attempts to address some of these, but +it cannot do so completely. Do read these warnings and consider how +they apply. + +For further discussion on these topics, please consult +[The Hadoop FileSystem API Definition](../../../hadoop-project-dist/hadoop-common/filesystem/index.html). + +### Warning #1: S3 Consistency model Amazon S3 is an example of "an object store". In order to achieve scalability and especially high availability, S3 has —as many other cloud object stores have @@ -65,23 +153,49 @@ recursive file-by-file operations. They take time at least proportional to the number of files, during which time partial updates may be visible. If the operations are interrupted, the filesystem is left in an intermediate state. -### Warning #2: Object stores don't track modification times of directories -Features of Hadoop relying on this can have unexpected behaviour. E.g. the -AggregatedLogDeletionService of YARN will not remove the appropriate logfiles. +### Warning #2: Directories are mimiced + +The S3A clients mimics directories by: + +1. Creating a stub entry after a `mkdirs` call, deleting it when a file +is added anywhere underneath +1. When listing a directory, searching for all objects whose path starts with +the directory path, and returning them as the listing. +1. When renaming a directory, taking such a listing and asking S3 to copying the +individual objects to new objects with the destination filenames. +1. When deleting a directory, taking such a listing and deleting the entries in +batches. +1. When renaming or deleting directories, taking such a listing and working +on the individual files. + + +Here are some of the consequences: + +* Directories may lack modification times. +Parts of Hadoop relying on this can have unexpected behaviour. E.g. the +`AggregatedLogDeletionService` of YARN will not remove the appropriate logfiles. +* Directory listing can be slow. Use `listFiles(path, recursive)` for high +performance recursive listings whenever possible. +* The time to rename a directory is proportional to the number of files +underneath it (directory or indirectly) and the size of the files. (The copyis +executed inside the S3 storage, so the time is independent of the bandwidth +from client to S3). +* Directory renames are not atomic: they can fail partway through, and callers +cannot safely rely on atomic renames as part of a commit algorithm. +* Directory deletion is not atomic and can fail partway through. +* It is possible to create files under files if the caller tries hard. + -For further discussion on these topics, please consult -[The Hadoop FileSystem API Definition](../../../hadoop-project-dist/hadoop-common/filesystem/index.html). ### Warning #3: Object stores have differerent authorization models The object authorization model of S3 is much different from the file -authorization model of HDFS and traditional file systems. It is not feasible to -persist file ownership and permissions in S3, so S3A reports stub information -from APIs that would query this metadata: +authorization model of HDFS and traditional file systems. +The S3A client simply reports stub information from APIs that would query this metadata: * File owner is reported as the current user. -* File group also is reported as the current user. Prior to Apache Hadoop +* File group also is reported as the current user. Prior to Apache Hadoop 2.8.0, file group was reported as empty (no group associated), which is a potential incompatibility problem for scripts that perform positional parsing of shell output and other clients that expect to find a well-defined group. @@ -93,10 +207,7 @@ Users authenticate to an S3 bucket using AWS credentials. It's possible that object ACLs have been defined to enforce authorization at the S3 side, but this happens entirely within the S3 service, not within the S3A implementation. -For further discussion on these topics, please consult -[The Hadoop FileSystem API Definition](../../../hadoop-project-dist/hadoop-common/filesystem/index.html). - -### Warning #4: Your AWS credentials are valuable +### Warning #4: Your AWS credentials are very, very valuable Your AWS credentials not only pay for services, they offer read and write access to the data. Anyone with the credentials can not only read your datasets @@ -107,250 +218,100 @@ Do not inadvertently share these credentials through means such as 1. Checking in to SCM any configuration files containing the secrets. 1. Logging them to a console, as they invariably end up being seen. 1. Defining filesystem URIs with the credentials in the URL, such as -`s3a://AK0010:secret@landsat/`. They will end up in logs and error messages. +`s3a://AK0010:secret@landsat-pds/`. They will end up in logs and error messages. 1. Including the secrets in bug reports. If you do any of these: change your credentials immediately! -### Warning #5: The S3 client provided by Amazon EMR are not from the Apache Software foundation, and are only supported by Amazon. +### Warning #5: The S3A client cannot be used on Amazon EMR -Specifically: on Amazon EMR, s3a is not supported, and amazon recommend -a different filesystem implementation. If you are using Amazon EMR, follow -these instructions —and be aware that all issues related to S3 integration -in EMR can only be addressed by Amazon themselves: please raise your issues -with them. +On Amazon EMR `s3a://` URLs are not supported; Amazon provide +their own filesystem client, `s3://`. +If you are using Amazon EMR, follow their instructions for use —and be aware +that all issues related to S3 integration in EMR can only be addressed by Amazon +themselves: please raise your issues with them. -## S3N +Equally importantly: much of this document does not apply to the EMR `s3://` client. +Pleae consult +[the EMR storage documentation](http://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-plan-file-systems.html) +instead. -S3N was the first S3 Filesystem client which used "native" S3 objects, hence -the schema `s3n://`. +## Authenticating with S3 -### Features +Except when interacting with public S3 buckets, the S3A client +needs the credentials needed to interact with buckets. -* Directly reads and writes S3 objects. -* Compatible with standard S3 clients. -* Supports partitioned uploads for many-GB objects. -* Available across all Hadoop 2.x releases. - -The S3N filesystem client, while widely used, is no longer undergoing -active maintenance except for emergency security issues. There are -known bugs, especially: it reads to end of a stream when closing a read; -this can make `seek()` slow on large files. The reason there has been no -attempt to fix this is that every upgrade of the Jets3t library, while -fixing some problems, has unintentionally introduced new ones in either the changed -Hadoop code, or somewhere in the Jets3t/Httpclient code base. -The number of defects remained constant, they merely moved around. - -By freezing the Jets3t jar version and avoiding changes to the code, -we reduce the risk of making things worse. - -The S3A filesystem client can read all files created by S3N. Accordingly -it should be used wherever possible. - - -### Dependencies - -* `jets3t` jar -* `commons-codec` jar -* `commons-logging` jar -* `httpclient` jar -* `httpcore` jar -* `java-xmlbuilder` jar - - -### Authentication properties - - - fs.s3n.awsAccessKeyId - AWS access key ID - - - - fs.s3n.awsSecretAccessKey - AWS secret key - - -### Other properties - - - fs.s3n.buffer.dir - ${hadoop.tmp.dir}/s3 - Determines where on the local filesystem the s3n: filesystem - should store files before sending them to S3 - (or after retrieving them from S3). - - - - - fs.s3n.maxRetries - 4 - The maximum number of retries for reading or writing files to - S3, before we signal failure to the application. - - - - - fs.s3n.sleepTimeSeconds - 10 - The number of seconds to sleep between each S3 retry. - - - - - fs.s3n.block.size - 67108864 - Block size to use when reading files using the native S3 - filesystem (s3n: URIs). - - - - fs.s3n.multipart.uploads.enabled - false - Setting this property to true enables multiple uploads to - native S3 filesystem. When uploading a file, it is split into blocks - if the size is larger than fs.s3n.multipart.uploads.block.size. - - - - - fs.s3n.multipart.uploads.block.size - 67108864 - The block size for multipart uploads to native S3 filesystem. - Default size is 64MB. - - - - - fs.s3n.multipart.copy.block.size - 5368709120 - The block size for multipart copy in native S3 filesystem. - Default size is 5GB. - - - - - fs.s3n.server-side-encryption-algorithm - - Specify a server-side encryption algorithm for S3. - Unset by default, and the only other currently allowable value is AES256. - - - -## S3A - - -The S3A filesystem client, prefix `s3a://`, is the S3 client undergoing -active development and maintenance. -While this means that there is a bit of instability -of configuration options and behavior, it also means -that the code is getting better in terms of reliability, performance, -monitoring and other features. - -### Features - -* Directly reads and writes S3 objects. -* Compatible with standard S3 clients. -* Can read data created with S3N. -* Can write data back that is readable by S3N. (Note: excluding encryption). -* Supports partitioned uploads for many-GB objects. -* Instrumented with Hadoop metrics. -* Performance optimized operations, including `seek()` and `readFully()`. -* Uses Amazon's Java S3 SDK with support for latest S3 features and authentication -schemes. -* Supports authentication via: environment variables, Hadoop configuration -properties, the Hadoop key management store and IAM roles. -* Supports S3 "Server Side Encryption" for both reading and writing. -* Supports proxies -* Test suites includes distcp and suites in downstream projects. -* Available since Hadoop 2.6; considered production ready in Hadoop 2.7. -* Actively maintained. -* Supports per-bucket configuration. - -S3A is now the recommended client for working with S3 objects. It is also the -one where patches for functionality and performance are very welcome. - -### Dependencies - -* `hadoop-aws` jar. -* `aws-java-sdk-s3` jar. -* `aws-java-sdk-core` jar. -* `aws-java-sdk-kms` jar. -* `joda-time` jar; use version 2.8.1 or later. -* `httpclient` jar. -* Jackson `jackson-core`, `jackson-annotations`, `jackson-databind` jars. - -### S3A Authentication methods - -S3A supports multiple authentication mechanisms, and can be configured as to -which mechanisms to use, and the order to use them. Custom implementations +The client supports multiple authentication mechanisms and can be configured as to +which mechanisms to use, and their order of use. Custom implementations of `com.amazonaws.auth.AWSCredentialsProvider` may also be used. ### Authentication properties - - fs.s3a.access.key - AWS access key ID. - Omit for IAM role-based or provider-based authentication. - +```xml + + fs.s3a.access.key + AWS access key ID. + Omit for IAM role-based or provider-based authentication. + - - fs.s3a.secret.key - AWS secret key. - Omit for IAM role-based or provider-based authentication. - + + fs.s3a.secret.key + AWS secret key. + Omit for IAM role-based or provider-based authentication. + - - fs.s3a.aws.credentials.provider - - Comma-separated class names of credential provider classes which implement - com.amazonaws.auth.AWSCredentialsProvider. + + fs.s3a.aws.credentials.provider + + Comma-separated class names of credential provider classes which implement + com.amazonaws.auth.AWSCredentialsProvider. - These are loaded and queried in sequence for a valid set of credentials. - Each listed class must implement one of the following means of - construction, which are attempted in order: - 1. a public constructor accepting java.net.URI and - org.apache.hadoop.conf.Configuration, - 2. a public static method named getInstance that accepts no - arguments and returns an instance of - com.amazonaws.auth.AWSCredentialsProvider, or - 3. a public default constructor. + These are loaded and queried in sequence for a valid set of credentials. + Each listed class must implement one of the following means of + construction, which are attempted in order: + 1. a public constructor accepting java.net.URI and + org.apache.hadoop.conf.Configuration, + 2. a public static method named getInstance that accepts no + arguments and returns an instance of + com.amazonaws.auth.AWSCredentialsProvider, or + 3. a public default constructor. - Specifying org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider allows - anonymous access to a publicly accessible S3 bucket without any credentials. - Please note that allowing anonymous access to an S3 bucket compromises - security and therefore is unsuitable for most use cases. It can be useful - for accessing public data sets without requiring AWS credentials. + Specifying org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider allows + anonymous access to a publicly accessible S3 bucket without any credentials. + Please note that allowing anonymous access to an S3 bucket compromises + security and therefore is unsuitable for most use cases. It can be useful + for accessing public data sets without requiring AWS credentials. - If unspecified, then the default list of credential provider classes, - queried in sequence, is: - 1. org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider: supports - static configuration of AWS access key ID and secret access key. - See also fs.s3a.access.key and fs.s3a.secret.key. - 2. com.amazonaws.auth.EnvironmentVariableCredentialsProvider: supports - configuration of AWS access key ID and secret access key in - environment variables named AWS_ACCESS_KEY_ID and - AWS_SECRET_ACCESS_KEY, as documented in the AWS SDK. - 3. com.amazonaws.auth.InstanceProfileCredentialsProvider: supports use - of instance profile credentials if running in an EC2 VM. - - + If unspecified, then the default list of credential provider classes, + queried in sequence, is: + 1. org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider: supports + static configuration of AWS access key ID and secret access key. + See also fs.s3a.access.key and fs.s3a.secret.key. + 2. com.amazonaws.auth.EnvironmentVariableCredentialsProvider: supports + configuration of AWS access key ID and secret access key in + environment variables named AWS_ACCESS_KEY_ID and + AWS_SECRET_ACCESS_KEY, as documented in the AWS SDK. + 3. com.amazonaws.auth.InstanceProfileCredentialsProvider: supports use + of instance profile credentials if running in an EC2 VM. + + - - fs.s3a.session.token - - Session token, when using org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider - as one of the providers. - - + + fs.s3a.session.token + + Session token, when using org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider + as one of the providers. + + +``` - -#### Authenticating via environment variables +### Authenticating via the AWS Environment Variables S3A supports configuration via [the standard AWS environment variables](http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html#cli-environment). The core environment variables are for the access key and associated secret: -``` +```bash export AWS_ACCESS_KEY_ID=my.aws.key export AWS_SECRET_ACCESS_KEY=my.secret.key ``` @@ -359,7 +320,7 @@ If the environment variable `AWS_SESSION_TOKEN` is set, session authentication using "Temporary Security Credentials" is enabled; the Key ID and secret key must be set to the credentials for that specific sesssion. -``` +```bash export AWS_SESSION_TOKEN=SECRET-SESSION-TOKEN export AWS_ACCESS_KEY_ID=SESSION-ACCESS-KEY export AWS_SECRET_ACCESS_KEY=SESSION-SECRET-KEY @@ -369,14 +330,13 @@ These environment variables can be used to set the authentication credentials instead of properties in the Hadoop configuration. *Important:* -These environment variables are not propagated from client to server when +These environment variables are generally not propagated from client to server when YARN applications are launched. That is: having the AWS environment variables set when an application is launched will not permit the launched application to access S3 resources. The environment variables must (somehow) be set on the hosts/processes where the work is executed. - -#### Changing Authentication Providers +### Changing Authentication Providers The standard way to authenticate is with an access key and secret key using the properties in the configuration file. @@ -421,7 +381,7 @@ set up in the authentication chain: | `com.amazonaws.auth.EnvironmentVariableCredentialsProvider`| AWS Environment Variables | -*EC2 Metadata Credentials with `InstanceProfileCredentialsProvider`* +### EC2 IAM Metadata Authentication with `InstanceProfileCredentialsProvider` Applications running in EC2 may associate an IAM role with the VM and query the [EC2 Instance Metadata Service](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html) @@ -429,7 +389,7 @@ for credentials to access S3. Within the AWS SDK, this functionality is provided by `InstanceProfileCredentialsProvider`, which internally enforces a singleton instance in order to prevent throttling problem. -*Session Credentials with `TemporaryAWSCredentialsProvider`* +### Using Session Credentials with `TemporaryAWSCredentialsProvider` [Temporary Security Credentials](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp.html) can be obtained from the Amazon Security Token Service; these @@ -470,7 +430,7 @@ The lifetime of session credentials are fixed when the credentials are issued; once they expire the application will no longer be able to authenticate to AWS. -*Anonymous Login with `AnonymousAWSCredentialsProvider`* +### Anonymous Login with `AnonymousAWSCredentialsProvider` Specifying `org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider` allows anonymous access to a publicly accessible S3 bucket without any credentials. @@ -511,10 +471,12 @@ supports the secret key in `fs.s3a.access.key` and token in `fs.s3a.secret.key` values. It does not support authentication with logins credentials declared in the URLs. - - fs.s3a.aws.credentials.provider - org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider - +```xml + + fs.s3a.aws.credentials.provider + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider + +``` Apart from its lack of support of user:password details being included in filesystem URLs (a dangerous practise that is strongly discouraged), this provider acts @@ -522,17 +484,18 @@ exactly at the basic authenticator used in the default authentication chain. This means that the default S3A authentication chain can be defined as - - fs.s3a.aws.credentials.provider - - org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider, - com.amazonaws.auth.EnvironmentVariableCredentialsProvider, - com.amazonaws.auth.InstanceProfileCredentialsProvider - - +```xml + + fs.s3a.aws.credentials.provider + + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider, + com.amazonaws.auth.EnvironmentVariableCredentialsProvider, + com.amazonaws.auth.InstanceProfileCredentialsProvider + + +``` - -#### Protecting the AWS Credentials +### Protecting the AWS Credentials To protect the access/secret keys from prying eyes, it is recommended that you use either IAM role-based authentication (such as EC2 instance profile) or @@ -541,7 +504,7 @@ through configuration. The following describes using the latter for AWS credentials in the S3A FileSystem. -##### Storing secrets with Hadoop Credential Providers +## Storing secrets with Hadoop Credential Providers The Hadoop Credential Provider Framework allows secure "Credential Providers" to keep secrets outside Hadoop configuration files, storing them in encrypted @@ -557,7 +520,7 @@ For additional reading on the Hadoop Credential Provider API see: [Credential Provider API](../../../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html). -###### Create a credential file +### Step 1: Create a credential file A credential file can be created on any Hadoop filesystem; when creating one on HDFS or a Unix filesystem the permissions are automatically set to keep the file @@ -585,7 +548,7 @@ fs.s3a.access.key ``` At this point, the credentials are ready for use. -###### Configure the `hadoop.security.credential.provider.path` property +### Step 2: Configure the `hadoop.security.credential.provider.path` property The URL to the provider must be set in the configuration property `hadoop.security.credential.provider.path`, either on the command line or @@ -625,18 +588,16 @@ Supporting a separate list in an `fs.s3a.` prefix permits per-bucket configurati of credential files. -###### Using the credentials +### Using secrets from credential providers Once the provider is set in the Hadoop configuration, hadoop commands work exactly as if the secrets were in an XML file. ```bash - hadoop distcp \ hdfs://nn1.example.com:9001/user/backup/007020615 s3a://glacier1/ hadoop fs -ls s3a://glacier1/ - ``` The path to the provider can also be set on the command line: @@ -649,253 +610,272 @@ hadoop distcp \ hadoop fs \ -D fs.s3a.security.credential.provider.path=jceks://hdfs@nn1.example.com:9001/user/backup/s3.jceks \ -ls s3a://glacier1/ - ``` Because the provider path is not itself a sensitive secret, there is no risk from placing its declaration on the command line. -### Other properties +## Genaral S3A Client configuration - - fs.s3a.connection.maximum - 15 - Controls the maximum number of simultaneous connections to S3. - +All S3A client options are configured with options with the prefix `fs.s3a.`. - - fs.s3a.connection.ssl.enabled - true - Enables or disables SSL connections to S3. - +The client supports Per-bucket configuration +to allow different buckets to override the shared settings. This is commonly +used to change the endpoint, encryption and authentication mechanisms of buckets. +S3Guard options, various minor options. - - fs.s3a.endpoint - AWS S3 endpoint to connect to. An up-to-date list is - provided in the AWS Documentation: regions and endpoints. Without this - property, the standard region (s3.amazonaws.com) is assumed. - - +Here are the S3A properties for use in production. The S3Guard options are +documented in the [S3Guard documenents](./s3guard.html); some testing-related +options are covered in [Testing](./testing.md). - - fs.s3a.path.style.access - false - Enable S3 path style access ie disabling the default virtual hosting behaviour. - Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting. - - +```xml + + fs.s3a.connection.maximum + 15 + Controls the maximum number of simultaneous connections to S3. + - - fs.s3a.proxy.host - Hostname of the (optional) proxy server for S3 connections. - + + fs.s3a.connection.ssl.enabled + true + Enables or disables SSL connections to S3. + - - fs.s3a.proxy.port - Proxy server port. If this property is not set - but fs.s3a.proxy.host is, port 80 or 443 is assumed (consistent with - the value of fs.s3a.connection.ssl.enabled). - + + fs.s3a.endpoint + AWS S3 endpoint to connect to. An up-to-date list is + provided in the AWS Documentation: regions and endpoints. Without this + property, the standard region (s3.amazonaws.com) is assumed. + + - - fs.s3a.proxy.username - Username for authenticating with proxy server. - + + fs.s3a.path.style.access + false + Enable S3 path style access ie disabling the default virtual hosting behaviour. + Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting. + + - - fs.s3a.proxy.password - Password for authenticating with proxy server. - + + fs.s3a.proxy.host + Hostname of the (optional) proxy server for S3 connections. + - - fs.s3a.proxy.domain - Domain for authenticating with proxy server. - + + fs.s3a.proxy.port + Proxy server port. If this property is not set + but fs.s3a.proxy.host is, port 80 or 443 is assumed (consistent with + the value of fs.s3a.connection.ssl.enabled). + - - fs.s3a.proxy.workstation - Workstation for authenticating with proxy server. - + + fs.s3a.proxy.username + Username for authenticating with proxy server. + - - fs.s3a.attempts.maximum - 20 - How many times we should retry commands on transient errors. - + + fs.s3a.proxy.password + Password for authenticating with proxy server. + - - fs.s3a.connection.establish.timeout - 5000 - Socket connection setup timeout in milliseconds. - + + fs.s3a.proxy.domain + Domain for authenticating with proxy server. + - - fs.s3a.connection.timeout - 200000 - Socket connection timeout in milliseconds. - + + fs.s3a.proxy.workstation + Workstation for authenticating with proxy server. + - - fs.s3a.paging.maximum - 5000 - How many keys to request from S3 when doing - directory listings at a time. - + + fs.s3a.attempts.maximum + 20 + How many times we should retry commands on transient errors. + - - fs.s3a.threads.max - 10 - Maximum number of concurrent active (part)uploads, - which each use a thread from the threadpool. - + + fs.s3a.connection.establish.timeout + 5000 + Socket connection setup timeout in milliseconds. + - - fs.s3a.socket.send.buffer - 8192 - Socket send buffer hint to amazon connector. Represented in bytes. - + + fs.s3a.connection.timeout + 200000 + Socket connection timeout in milliseconds. + - - fs.s3a.socket.recv.buffer - 8192 - Socket receive buffer hint to amazon connector. Represented in bytes. - + + fs.s3a.paging.maximum + 5000 + How many keys to request from S3 when doing + directory listings at a time. + - - fs.s3a.threads.keepalivetime - 60 - Number of seconds a thread can be idle before being - terminated. - + + fs.s3a.threads.max + 10 + Maximum number of concurrent active (part)uploads, + which each use a thread from the threadpool. + - - fs.s3a.max.total.tasks - 5 - Number of (part)uploads allowed to the queue before - blocking additional uploads. - + + fs.s3a.socket.send.buffer + 8192 + Socket send buffer hint to amazon connector. Represented in bytes. + - - fs.s3a.multipart.size - 100M - How big (in bytes) to split upload or copy operations up into. - A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. - - + + fs.s3a.socket.recv.buffer + 8192 + Socket receive buffer hint to amazon connector. Represented in bytes. + - - fs.s3a.multipart.threshold - 2147483647 - How big (in bytes) to split upload or copy operations up into. - This also controls the partition size in renamed files, as rename() involves - copying the source file(s). - A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. - - + + fs.s3a.threads.keepalivetime + 60 + Number of seconds a thread can be idle before being + terminated. + - - fs.s3a.multiobjectdelete.enable - true - When enabled, multiple single-object delete requests are replaced by - a single 'delete multiple objects'-request, reducing the number of requests. - Beware: legacy S3-compatible object stores might not support this request. - - + + fs.s3a.max.total.tasks + 5 + Number of (part)uploads allowed to the queue before + blocking additional uploads. + - - fs.s3a.acl.default - Set a canned ACL for newly created and copied objects. Value may be Private, - PublicRead, PublicReadWrite, AuthenticatedRead, LogDeliveryWrite, BucketOwnerRead, - or BucketOwnerFullControl. - + + fs.s3a.multipart.size + 100M + How big (in bytes) to split upload or copy operations up into. + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. + + - - fs.s3a.multipart.purge - false - True if you want to purge existing multipart uploads that may not have been - completed/aborted correctly - + + fs.s3a.multipart.threshold + 2147483647 + How big (in bytes) to split upload or copy operations up into. + This also controls the partition size in renamed files, as rename() involves + copying the source file(s). + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. + + - - fs.s3a.multipart.purge.age - 86400 - Minimum age in seconds of multipart uploads to purge - + + fs.s3a.multiobjectdelete.enable + true + When enabled, multiple single-object delete requests are replaced by + a single 'delete multiple objects'-request, reducing the number of requests. + Beware: legacy S3-compatible object stores might not support this request. + + - - fs.s3a.signing-algorithm - Override the default signing algorithm so legacy - implementations can still be used - + + fs.s3a.acl.default + Set a canned ACL for newly created and copied objects. Value may be Private, + PublicRead, PublicReadWrite, AuthenticatedRead, LogDeliveryWrite, BucketOwnerRead, + or BucketOwnerFullControl. + - - fs.s3a.server-side-encryption-algorithm - Specify a server-side encryption algorithm for s3a: file system. - Unset by default. It supports the following values: 'AES256' (for SSE-S3), 'SSE-KMS' - and 'SSE-C' - - + + fs.s3a.multipart.purge + false + True if you want to purge existing multipart uploads that may not have been + completed/aborted correctly + - - fs.s3a.server-side-encryption.key - Specific encryption key to use if fs.s3a.server-side-encryption-algorithm - has been set to 'SSE-KMS' or 'SSE-C'. In the case of SSE-C, the value of this property - should be the Base64 encoded key. If you are using SSE-KMS and leave this property empty, - you'll be using your default's S3 KMS key, otherwise you should set this property to - the specific KMS key id. - + + fs.s3a.multipart.purge.age + 86400 + Minimum age in seconds of multipart uploads to purge + - - fs.s3a.buffer.dir - ${hadoop.tmp.dir}/s3a - Comma separated list of directories that will be used to buffer file - uploads to. No effect if fs.s3a.fast.upload is true. - + + fs.s3a.signing-algorithm + Override the default signing algorithm so legacy + implementations can still be used + - - fs.s3a.block.size - 32M - Block size to use when reading files using s3a: file system. - - + + fs.s3a.server-side-encryption-algorithm + Specify a server-side encryption algorithm for s3a: file system. + Unset by default. It supports the following values: 'AES256' (for SSE-S3), 'SSE-KMS' + and 'SSE-C' + + - - fs.s3a.user.agent.prefix - - - Sets a custom value that will be prepended to the User-Agent header sent in - HTTP requests to the S3 back-end by S3AFileSystem. The User-Agent header - always includes the Hadoop version number followed by a string generated by - the AWS SDK. An example is "User-Agent: Hadoop 2.8.0, aws-sdk-java/1.10.6". - If this optional property is set, then its value is prepended to create a - customized User-Agent. For example, if this configuration property was set - to "MyApp", then an example of the resulting User-Agent would be - "User-Agent: MyApp, Hadoop 2.8.0, aws-sdk-java/1.10.6". - - + + fs.s3a.server-side-encryption.key + Specific encryption key to use if fs.s3a.server-side-encryption-algorithm + has been set to 'SSE-KMS' or 'SSE-C'. In the case of SSE-C, the value of this property + should be the Base64 encoded key. If you are using SSE-KMS and leave this property empty, + you'll be using your default's S3 KMS key, otherwise you should set this property to + the specific KMS key id. + - - fs.s3a.impl - org.apache.hadoop.fs.s3a.S3AFileSystem - The implementation class of the S3A Filesystem - + + fs.s3a.buffer.dir + ${hadoop.tmp.dir}/s3a + Comma separated list of directories that will be used to buffer file + uploads to. + - - fs.AbstractFileSystem.s3a.impl - org.apache.hadoop.fs.s3a.S3A - The implementation class of the S3A AbstractFileSystem. - + + fs.s3a.block.size + 32M + Block size to use when reading files using s3a: file system. + + - - fs.s3a.readahead.range - 64K - Bytes to read ahead during a seek() before closing and - re-opening the S3 HTTP connection. This option will be overridden if - any call to setReadahead() is made to an open stream. - + + fs.s3a.user.agent.prefix + + + Sets a custom value that will be prepended to the User-Agent header sent in + HTTP requests to the S3 back-end by S3AFileSystem. The User-Agent header + always includes the Hadoop version number followed by a string generated by + the AWS SDK. An example is "User-Agent: Hadoop 2.8.0, aws-sdk-java/1.10.6". + If this optional property is set, then its value is prepended to create a + customized User-Agent. For example, if this configuration property was set + to "MyApp", then an example of the resulting User-Agent would be + "User-Agent: MyApp, Hadoop 2.8.0, aws-sdk-java/1.10.6". + + -### Configuring different S3 buckets + + fs.s3a.impl + org.apache.hadoop.fs.s3a.S3AFileSystem + The implementation class of the S3A Filesystem + + + + fs.AbstractFileSystem.s3a.impl + org.apache.hadoop.fs.s3a.S3A + The implementation class of the S3A AbstractFileSystem. + + + + fs.s3a.readahead.range + 64K + Bytes to read ahead during a seek() before closing and + re-opening the S3 HTTP connection. This option will be overridden if + any call to setReadahead() is made to an open stream. + + + + fs.s3a.list.version + 2 + Select which version of the S3 SDK's List Objects API to use. + Currently support 2 (default) and 1 (older API). + +``` + +## Configuring different S3 buckets with Per-Bucket Configuration Different S3 buckets can be accessed with different S3A client configurations. This allows for different endpoints, data read and write strategies, as well @@ -918,9 +898,10 @@ role information available when deployed in Amazon EC2. ``` -This will be the default authentication mechanism for S3A buckets. +This will become the default authentication mechanism for S3A buckets. -A bucket `s3a://nightly/` used for nightly data uses a session key: +A bucket `s3a://nightly/` used for nightly data can then be given +a session key: ```xml @@ -944,7 +925,7 @@ A bucket `s3a://nightly/` used for nightly data uses a session key: ``` -Finally, the public `s3a://landsat-pds/` bucket is accessed anonymously: +Finally, the public `s3a://landsat-pds/` bucket can be accessed anonymously: ```xml @@ -953,7 +934,7 @@ Finally, the public `s3a://landsat-pds/` bucket is accessed anonymously: ``` -**Customizing S3A secrets held in credential files** +### Customizing S3A secrets held in credential files Although most properties are automatically propagated from their `fs.s3a.bucket.`-prefixed custom entry to that of the base `fs.s3a.` option @@ -967,7 +948,7 @@ then declare the path to the appropriate credential file in a bucket-specific version of the property `fs.s3a.security.credential.provider.path`. -### Using Per-Bucket Configuration to access data round the world +### Using Per-Bucket Configuration to access data round the world S3 Buckets are hosted in different "regions", the default being "US-East". The S3A client talks to this region by default, issing HTTP requests @@ -1073,7 +1054,6 @@ Here is a list of properties defining all AWS S3 regions, current as of June 201 ``` - This list can be used to specify the endpoint of individual buckets, for example for buckets in the central and EU/Ireland endpoints. @@ -1089,57 +1069,28 @@ for buckets in the central and EU/Ireland endpoints. ${ireland.endpoint} The endpoint for s3a://eu-dataset URLs - ``` Why explicitly declare a bucket bound to the central endpoint? It ensures that if the default endpoint is changed to a new region, data store in US-east is still reachable. +## How S3A writes data to S3 -### Stabilizing: S3A Fast Upload +The original S3A client implemented file writes by +buffering all data to disk as it was written to the `OutputStream`. +Only when the stream's `close()` method was called would the upload start. +This can made output slow, especially on large uploads, and could even +fill up the disk space of small (virtual) disks. -**New in Hadoop 2.7; significantly enhanced in Hadoop 2.8** +Hadoop 2.7 added the `S3AFastOutputStream` alternative, which Hadoop 2.8 expanded. +It is now considered stable and has replaced the original `S3AOutputStream`, +which is no longer shipped in hadoop. +The "fast" output stream -Because of the nature of the S3 object store, data written to an S3A `OutputStream` -is not written incrementally —instead, by default, it is buffered to disk -until the stream is closed in its `close()` method. - -This can make output slow: - -* The execution time for `OutputStream.close()` is proportional to the amount of data -buffered and inversely proportional to the bandwidth. That is `O(data/bandwidth)`. -* The bandwidth is that available from the host to S3: other work in the same -process, server or network at the time of upload may increase the upload time, -hence the duration of the `close()` call. -* If a process uploading data fails before `OutputStream.close()` is called, -all data is lost. -* The disks hosting temporary directories defined in `fs.s3a.buffer.dir` must -have the capacity to store the entire buffered file. - -Put succinctly: the further the process is from the S3 endpoint, or the smaller -the EC-hosted VM is, the longer it will take work to complete. - -This can create problems in application code: - -* Code often assumes that the `close()` call is fast; - the delays can create bottlenecks in operations. -* Very slow uploads sometimes cause applications to time out. (generally, -threads blocking during the upload stop reporting progress, so trigger timeouts) -* Streaming very large amounts of data may consume all disk space before the upload begins. - - -Work to addess this began in Hadoop 2.7 with the `S3AFastOutputStream` -[HADOOP-11183](https://issues.apache.org/jira/browse/HADOOP-11183), and -has continued with ` S3ABlockOutputStream` -[HADOOP-13560](https://issues.apache.org/jira/browse/HADOOP-13560). - - -This adds an alternative output stream, "S3a Fast Upload" which: - -1. Always uploads large files as blocks with the size set by +1. Uploads large files as blocks with the size set by `fs.s3a.multipart.size`. That is: the threshold at which multipart uploads begin and the size of each upload are identical. 1. Buffers blocks to disk (default) or in on-heap or off-heap memory. @@ -1154,34 +1105,19 @@ This adds an alternative output stream, "S3a Fast Upload" which: 1. Has the time to `close()` set by the amount of remaning data to upload, rather than the total size of the file. -With incremental writes of blocks, "S3A fast upload" offers an upload -time at least as fast as the "classic" mechanism, with significant benefits -on long-lived output streams, and when very large amounts of data are generated. -The in memory buffering mechanims may also offer speedup when running adjacent to +Because it starts uploading while data is still being written, it offers +significant benefits when very large amounts of data are generated. +The in memory buffering mechanims may also offer speedup when running adjacent to S3 endpoints, as disks are not used for intermediate data storage. ```xml - - fs.s3a.fast.upload - true - - Use the incremental block upload mechanism with - the buffering mechanism set in fs.s3a.fast.upload.buffer. - The number of threads performing uploads in the filesystem is defined - by fs.s3a.threads.max; the queue of waiting uploads limited by - fs.s3a.max.total.tasks. - The size of each buffer is set by fs.s3a.multipart.size. - - - fs.s3a.fast.upload.buffer disk - The buffering mechanism to use when using S3A fast upload - (fs.s3a.fast.upload=true). Values: disk, array, bytebuffer. - This configuration option has no effect if fs.s3a.fast.upload is false. + The buffering mechanism to use. + Values: disk, array, bytebuffer. "disk" will use the directories listed in fs.s3a.buffer.dir as the location(s) to save data prior to being uploaded. @@ -1235,26 +1171,19 @@ upload operation counts, so identifying when there is a backlog of work/ a mismatch between data generation rates and network bandwidth. Per-stream statistics can also be logged by calling `toString()` on the current stream. -* Incremental writes are not visible; the object can only be listed -or read when the multipart operation completes in the `close()` call, which -will block until the upload is completed. +* Files being written are still invisible untl the write +completes in the `close()` call, which will block until the upload is completed. -#### Fast Upload with Disk Buffers `fs.s3a.fast.upload.buffer=disk` +### Buffering upload data on disk `fs.s3a.fast.upload.buffer=disk` When `fs.s3a.fast.upload.buffer` is set to `disk`, all data is buffered to local hard disks prior to upload. This minimizes the amount of memory consumed, and so eliminates heap size as the limiting factor in queued uploads -—exactly as the original "direct to disk" buffering used when -`fs.s3a.fast.upload=false`. +—exactly as the original "direct to disk" buffering. ```xml - - fs.s3a.fast.upload - true - - fs.s3a.fast.upload.buffer disk @@ -1262,18 +1191,16 @@ consumed, and so eliminates heap size as the limiting factor in queued uploads fs.s3a.buffer.dir - - Comma separated list of temporary directories use for - storing blocks of data prior to their being uploaded to S3. - When unset, the Hadoop temporary directory hadoop.tmp.dir is used + ${hadoop.tmp.dir}/s3a + Comma separated list of directories that will be used to buffer file + uploads to. - ``` This is the default buffer mechanism. The amount of data which can be buffered is limited by the amount of available disk space. -#### Fast Upload with ByteBuffers: `fs.s3a.fast.upload.buffer=bytebuffer` +### Buffering upload data in ByteBuffers: `fs.s3a.fast.upload.buffer=bytebuffer` When `fs.s3a.fast.upload.buffer` is set to `bytebuffer`, all data is buffered in "Direct" ByteBuffers prior to upload. This *may* be faster than buffering to disk, @@ -1287,52 +1214,39 @@ the amount of memory requested for each container. The slower the upload bandwidth to S3, the greater the risk of running out of memory —and so the more care is needed in -[tuning the upload settings](#s3a_fast_upload_thread_tuning). +[tuning the upload settings](#upload_thread_tuning). ```xml - - fs.s3a.fast.upload - true - - fs.s3a.fast.upload.buffer bytebuffer ``` -#### Fast Upload with Arrays: `fs.s3a.fast.upload.buffer=array` +### Buffering upload data in byte arrays: `fs.s3a.fast.upload.buffer=array` When `fs.s3a.fast.upload.buffer` is set to `array`, all data is buffered in byte arrays in the JVM's heap prior to upload. This *may* be faster than buffering to disk. -This `array` option is similar to the in-memory-only stream offered in -Hadoop 2.7 with `fs.s3a.fast.upload=true` - The amount of data which can be buffered is limited by the available size of the JVM heap heap. The slower the write bandwidth to S3, the greater the risk of heap overflows. This risk can be mitigated by -[tuning the upload settings](#s3a_fast_upload_thread_tuning). +[tuning the upload settings](#upload_thread_tuning). ```xml - - fs.s3a.fast.upload - true - - fs.s3a.fast.upload.buffer array - ``` -#### S3A Fast Upload Thread Tuning -Both the [Array](#s3a_fast_upload_array) and [Byte buffer](#s3a_fast_upload_bytebuffer) +### Upload Thread Tuning + +Both the [Array](#upload_array) and [Byte buffer](#upload_bytebuffer) buffer mechanisms can consume very large amounts of memory, on-heap or -off-heap respectively. The [disk buffer](#s3a_fast_upload_disk) mechanism +off-heap respectively. The [disk buffer](#upload_disk) mechanism does not use much memory up, but will consume hard disk capacity. If there are many output streams being written to in a single process, the @@ -1419,14 +1333,12 @@ from VMs running on EC2. Number of seconds a thread can be idle before being terminated. - ``` - -#### Cleaning up After Incremental Upload Failures: `fs.s3a.multipart.purge` +### Cleaning up after partial Upload Failures: `fs.s3a.multipart.purge` -If an incremental streaming operation is interrupted, there may be +If an large stream writeoperation is interrupted, there may be intermediate partitions uploaded to S3 —data which will be billed for. These charges can be reduced by enabling `fs.s3a.multipart.purge`, @@ -1450,7 +1362,7 @@ older than this time. ``` -If an S3A client is instantited with `fs.s3a.multipart.purge=true`, +If an S3A client is instantiated with `fs.s3a.multipart.purge=true`, it will delete all out of date uploads *in the entire bucket*. That is: it will affect all multipart uploads to that bucket, from all applications. @@ -1461,15 +1373,13 @@ rate. The best practise for using this option is to disable multipart purges in normal use of S3A, enabling only in manual/scheduled housekeeping operations. -### S3A Experimental "fadvise" input policy support - -**Warning: EXPERIMENTAL: behavior may change in future** +### S3A "fadvise" input policy support The S3A Filesystem client supports the notion of input policies, similar to that of the Posix `fadvise()` API call. This tunes the behavior of the S3A client to optimise HTTP GET requests for the different use cases. -#### "sequential" (default) +*"sequential"* Read through the file, possibly with some short forward seeks. @@ -1481,11 +1391,11 @@ This is leads to maximum read throughput —but with very expensive backward seeks. -#### "normal" +*"normal" (default)* -This is currently the same as "sequential". +This is currently the same as "sequential", though it may evolve in future. -#### "random" +*"random"* Optimised for random IO, specifically the Hadoop `PositionedReadable` operations —though `seek(offset); read(byte_buffer)` also benefits. @@ -1534,627 +1444,13 @@ to set fadvise policies on input streams. Once implemented, this will become the supported mechanism used for configuring the input IO policy. -### Encrypting objects with S3A +## Other Topics -Currently, S3A only supports S3's Server Side Encryption for at rest data encryption. -It is *encouraged* to read up on the [AWS documentation](https://docs.aws.amazon.com/AmazonS3/latest/dev/serv-side-encryption.html) -for S3 Server Side Encryption before using these options as each behave differently -and the documentation will be more up to date on its behavior. When configuring -an encryption method in the `core-site.xml`, this will apply cluster wide. Any -new files written will be encrypted with this encryption configuration. Any -existing files when read, will decrypt using the existing method (if possible) -and will not be re-encrypted with the new method. It is also possible if mixing -multiple keys that the user does not have access to decrypt the object. It is -**NOT** advised to mix and match encryption types in a bucket, and is *strongly* -recommended to just one type and key per bucket. +### Copying Data with distcp -SSE-S3 is where S3 will manage the encryption keys for each object. The parameter -for `fs.s3a.server-side-encryption-algorithm` is `AES256`. +Hadoop's `distcp` application can be used to copy data between a Hadoop +cluster and Amazon S3. +See [Copying Data Between a Cluster and Amazon S3](https://hortonworks.github.io/hdp-aws/s3-copy-data/index.html) +for details on S3 copying specifically. -SSE-KMS is where the user specifies a Customer Master Key(CMK) that is used to -encrypt the objects. The user may specify a specific CMK or leave the -`fs.s3a.server-side-encryption.key` empty to use the default auto-generated key -in AWS IAM. Each CMK configured in AWS IAM is region specific, and cannot be -used in a in a S3 bucket in a different region. There is can also be policies -assigned to the CMK that prohibit or restrict its use for users causing S3A -requests to fail. -SSE-C is where the user specifies an actual base64 encoded AES-256 key supplied -and managed by the user. - -#### SSE-C Warning - -It is strongly recommended to fully understand how SSE-C works in the S3 -environment before using this encryption type. Please refer to the Server Side -Encryption documentation available from AWS. SSE-C is only recommended for -advanced users with advanced encryption use cases. Failure to properly manage -encryption keys can cause data loss. Currently, the AWS S3 API(and thus S3A) -only supports one encryption key and cannot support decrypting objects during -moves under a previous key to a new destination. It is **NOT** advised to use -multiple encryption keys in a bucket, and is recommended to use one key per -bucket and to not change this key. This is due to when a request is made to S3, -the actual encryption key must be provided to decrypt the object and access the -metadata. Since only one encryption key can be provided at a time, S3A will not -pass the correct encryption key to decrypt the data. Please see the -troubleshooting section for more information. - - -## Troubleshooting S3A - -Common problems working with S3A are - -1. Classpath -1. Authentication -1. S3 Inconsistency side-effects - -Classpath is usually the first problem. For the S3x filesystem clients, -you need the Hadoop-specific filesystem clients, third party S3 client libraries -compatible with the Hadoop code, and any dependent libraries compatible with -Hadoop and the specific JVM. - -The classpath must be set up for the process talking to S3: if this is code -running in the Hadoop cluster, the JARs must be on that classpath. That -includes `distcp`. - - -### `ClassNotFoundException: org.apache.hadoop.fs.s3a.S3AFileSystem` - -(or `org.apache.hadoop.fs.s3native.NativeS3FileSystem`). - -These are the Hadoop classes, found in the `hadoop-aws` JAR. An exception -reporting one of these classes is missing means that this JAR is not on -the classpath. - -### `ClassNotFoundException: com.amazonaws.services.s3.AmazonS3Client` - -(or other `com.amazonaws` class.) - -This means that one or more of the `aws-*-sdk` JARs are missing. Add them. - -### Missing method in `com.amazonaws` class - -This can be triggered by incompatibilities between the AWS SDK on the classpath -and the version which Hadoop was compiled with. - -The AWS SDK JARs change their signature enough between releases that the only -way to safely update the AWS SDK version is to recompile Hadoop against the later -version. - -There's nothing the Hadoop team can do here: if you get this problem, then sorry, -but you are on your own. The Hadoop developer team did look at using reflection -to bind to the SDK, but there were too many changes between versions for this -to work reliably. All it did was postpone version compatibility problems until -the specific codepaths were executed at runtime —this was actually a backward -step in terms of fast detection of compatibility problems. - -### Missing method in a Jackson class - -This is usually caused by version mismatches between Jackson JARs on the -classpath. All Jackson JARs on the classpath *must* be of the same version. - - -### Authentication failure - -If Hadoop cannot authenticate with the S3 service endpoint, -the client retries a number of times before eventually failing. -When it finally gives up, it will report a message about signature mismatch: - -``` -com.amazonaws.services.s3.model.AmazonS3Exception: - The request signature we calculated does not match the signature you provided. - Check your key and signing method. - (Service: Amazon S3; Status Code: 403; Error Code: SignatureDoesNotMatch, -``` - -The likely cause is that you either have the wrong credentials or somehow -the credentials were not readable on the host attempting to read or write -the S3 Bucket. - -Enabling debug logging for the package `org.apache.hadoop.fs.s3a` -can help provide more information. - -The most common cause is that you have the wrong credentials for any of the current -authentication mechanism(s) —or somehow -the credentials were not readable on the host attempting to read or write -the S3 Bucket. However, there are a couple of system configuration problems -(JVM version, system clock) which also need to be checked. - -Most common: there's an error in the configuration properties. - - -1. Make sure that the name of the bucket is the correct one. -That is: check the URL. - -1. Make sure the property names are correct. For S3A, they are -`fs.s3a.access.key` and `fs.s3a.secret.key` —you cannot just copy the S3N -properties and replace `s3n` with `s3a`. - -1. Make sure the properties are visible to the process attempting to -talk to the object store. Placing them in `core-site.xml` is the standard -mechanism. - -1. If using session authentication, the session may have expired. -Generate a new session token and secret. - -1. If using environement variable-based authentication, make sure that the -relevant variables are set in the environment in which the process is running. - -The standard first step is: try to use the AWS command line tools with the same -credentials, through a command such as: - - hdfs fs -ls s3a://my-bucket/ - -Note the trailing "/" here; without that the shell thinks you are trying to list -your home directory under the bucket, which will only exist if explicitly created. - - -Attempting to list a bucket using inline credentials is a -means of verifying that the key and secret can access a bucket; - - hdfs fs -ls s3a://key:secret@my-bucket/ - -Do escape any `+` or `/` symbols in the secret, as discussed below, and never -share the URL, logs generated using it, or use such an inline authentication -mechanism in production. - -Finally, if you set the environment variables, you can take advantage of S3A's -support of environment-variable authentication by attempting the same ls operation. -That is: unset the `fs.s3a` secrets and rely on the environment variables. - -#### Authentication failure due to clock skew - -The timestamp is used in signing to S3, so as to -defend against replay attacks. If the system clock is too far behind *or ahead* -of Amazon's, requests will be rejected. - -This can surface as the situation where -read requests are allowed, but operations which write to the bucket are denied. - -Check the system clock. - -#### Authentication failure when using URLs with embedded secrets - -If using the (strongly discouraged) mechanism of including the -AWS Key and secret in a URL, then both "+" and "/" symbols need -to encoded in the URL. As many AWS secrets include these characters, -encoding problems are not uncommon. - -| symbol | encoded value| -|-----------|-------------| -| `+` | `%2B` | -| `/` | `%2F` | - - -As an example, a URL for `bucket` with AWS ID `user1` and secret `a+b/c` would -be represented as - -``` -s3a://user1:a%2Bb%2Fc@bucket/ -``` - -This technique is only needed when placing secrets in the URL. Again, -this is something users are strongly advised against using. - -#### Authentication Failures When Running on Java 8u60+ - -A change in the Java 8 JVM broke some of the `toString()` string generation -of Joda Time 2.8.0, which stopped the Amazon S3 client from being able to -generate authentication headers suitable for validation by S3. - -**Fix**: Make sure that the version of Joda Time is 2.8.1 or later, or -use a new version of Java 8. - - -### "Bad Request" exception when working with AWS S3 Frankfurt, Seoul, or other "V4" endpoint - - -S3 Frankfurt and Seoul *only* support -[the V4 authentication API](http://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html). - -Requests using the V2 API will be rejected with 400 `Bad Request` - -``` -$ bin/hadoop fs -ls s3a://frankfurt/ -WARN s3a.S3AFileSystem: Client: Amazon S3 error 400: 400 Bad Request; Bad Request (retryable) - -com.amazonaws.services.s3.model.AmazonS3Exception: Bad Request (Service: Amazon S3; Status Code: 400; Error Code: 400 Bad Request; Request ID: 923C5D9E75E44C06), S3 Extended Request ID: HDwje6k+ANEeDsM6aJ8+D5gUmNAMguOk2BvZ8PH3g9z0gpH+IuwT7N19oQOnIr5CIx7Vqb/uThE= - at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1182) - at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:770) - at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489) - at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310) - at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785) - at com.amazonaws.services.s3.AmazonS3Client.headBucket(AmazonS3Client.java:1107) - at com.amazonaws.services.s3.AmazonS3Client.doesBucketExist(AmazonS3Client.java:1070) - at org.apache.hadoop.fs.s3a.S3AFileSystem.verifyBucketExists(S3AFileSystem.java:307) - at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:284) - at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2793) - at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:101) - at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2830) - at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2812) - at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:389) - at org.apache.hadoop.fs.Path.getFileSystem(Path.java:356) - at org.apache.hadoop.fs.shell.PathData.expandAsGlob(PathData.java:325) - at org.apache.hadoop.fs.shell.Command.expandArgument(Command.java:235) - at org.apache.hadoop.fs.shell.Command.expandArguments(Command.java:218) - at org.apache.hadoop.fs.shell.FsCommand.processRawArguments(FsCommand.java:103) - at org.apache.hadoop.fs.shell.Command.run(Command.java:165) - at org.apache.hadoop.fs.FsShell.run(FsShell.java:315) - at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76) - at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:90) - at org.apache.hadoop.fs.FsShell.main(FsShell.java:373) -ls: doesBucketExist on frankfurt-new: com.amazonaws.services.s3.model.AmazonS3Exception: - Bad Request (Service: Amazon S3; Status Code: 400; Error Code: 400 Bad Request; -``` - -This happens when trying to work with any S3 service which only supports the -"V4" signing API —but the client is configured to use the default S3A service -endpoint. - -The S3A client needs to be given the endpoint to use via the `fs.s3a.endpoint` -property. - -As an example, the endpoint for S3 Frankfurt is `s3.eu-central-1.amazonaws.com`: - -```xml - - fs.s3a.endpoint - s3.eu-central-1.amazonaws.com - -``` - -### Error message "The bucket you are attempting to access must be addressed using the specified endpoint" - -This surfaces when `fs.s3a.endpoint` is configured to use an S3 service endpoint -which is neither the original AWS one, `s3.amazonaws.com` , nor the one where -the bucket is hosted. The error message contains the redirect target returned -by S3, which can be used to determine the correct value for `fs.s3a.endpoint`. - -``` -org.apache.hadoop.fs.s3a.AWSS3IOException: Received permanent redirect response - to bucket.s3-us-west-2.amazonaws.com. This likely indicates that the S3 - endpoint configured in fs.s3a.endpoint does not match the AWS region - containing the bucket.: The bucket you are attempting to access must be - addressed using the specified endpoint. Please send all future requests to - this endpoint. (Service: Amazon S3; Status Code: 301; - Error Code: PermanentRedirect; Request ID: 7D39EC1021C61B11) - at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:132) - at org.apache.hadoop.fs.s3a.S3AFileSystem.initMultipartUploads(S3AFileSystem.java:287) - at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:203) - at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2895) - at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:102) - at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2932) - at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2914) - at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:390) -``` - -1. Use the [Specific endpoint of the bucket's S3 service](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region) -1. If not using "V4" authentication (see above), the original S3 endpoint -can be used: - -```xml - - fs.s3a.endpoint - s3.amazonaws.com - -``` - -Using the explicit endpoint for the region is recommended for speed and -to use the V4 signing API. - - -### "Timeout waiting for connection from pool" when writing to S3A - -This happens when using the Block output stream, `fs.s3a.fast.upload=true` and -the thread pool runs out of capacity. - -``` -[s3a-transfer-shared-pool1-t20] INFO http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496)) - Unable to execute HTTP request: Timeout waiting for connection from poolorg.apache.http.conn.ConnectionPoolTimeoutException: Timeout waiting for connection from pool - at org.apache.http.impl.conn.PoolingClientConnectionManager.leaseConnection(PoolingClientConnectionManager.java:230) - at org.apache.http.impl.conn.PoolingClientConnectionManager$1.getConnection(PoolingClientConnectionManager.java:199) - at sun.reflect.GeneratedMethodAccessor13.invoke(Unknown Source) - at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) - at java.lang.reflect.Method.invoke(Method.java:498) - at com.amazonaws.http.conn.ClientConnectionRequestFactory$Handler.invoke(ClientConnectionRequestFactory.java:70) - at com.amazonaws.http.conn.$Proxy10.getConnection(Unknown Source) - at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:424) - at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:884) - at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82) - at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55) - at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:728) - at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489) - at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310) - at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785) - at com.amazonaws.services.s3.AmazonS3Client.doUploadPart(AmazonS3Client.java:2921) - at com.amazonaws.services.s3.AmazonS3Client.uploadPart(AmazonS3Client.java:2906) - at org.apache.hadoop.fs.s3a.S3AFileSystem.uploadPart(S3AFileSystem.java:1025) - at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload$1.call(S3ABlockOutputStream.java:360) - at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload$1.call(S3ABlockOutputStream.java:355) - at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239) - at java.util.concurrent.FutureTask.run(FutureTask.java:266) - at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) - at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) - at java.lang.Thread.run(Thread.java:745) -``` - -Make sure that `fs.s3a.connection.maximum` is at least larger -than `fs.s3a.threads.max`. - -```xml - - fs.s3a.threads.max - 20 - - - - fs.s3a.connection.maximum - 30 - -``` - -### "Timeout waiting for connection from pool" when reading from S3A - -This happens when more threads are trying to read from an S3A system than -the maximum number of allocated HTTP connections. - -Set `fs.s3a.connection.maximum` to a larger value (and at least as large as -`fs.s3a.threads.max`) - -### Out of heap memory when writing to S3A via Fast Upload - -This can happen when using the fast upload mechanism (`fs.s3a.fast.upload=true`) -and in-memory buffering (either `fs.s3a.fast.upload.buffer=array` or -`fs.s3a.fast.upload.buffer=bytebuffer`). - -More data is being generated than in the JVM than it can upload to S3 —and -so much data has been buffered that the JVM has run out of memory. - -Consult [S3A Fast Upload Thread Tuning](#s3a_fast_upload_thread_tuning) for -detail on this issue and options to address it. Consider also buffering to -disk, rather than memory. - - -### When writing to S3A: "java.io.FileNotFoundException: Completing multi-part upload" - - -``` -java.io.FileNotFoundException: Completing multi-part upload on fork-5/test/multipart/1c397ca6-9dfb-4ac1-9cf7-db666673246b: com.amazonaws.services.s3.model.AmazonS3Exception: The specified upload does not exist. The upload ID may be invalid, or the upload may have been aborted or completed. (Service: Amazon S3; Status Code: 404; Error Code: NoSuchUpload; Request ID: 84FF8057174D9369), S3 Extended Request ID: Ij5Yn6Eq/qIERH4Z6Io3YL2t9/qNZ7z9gjPb1FrTtTovZ8k1MXqh+zCYYjqmfJ/fCY6E1+JR9jA= - at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1182) - at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:770) - at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489) - at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310) - at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785) - at com.amazonaws.services.s3.AmazonS3Client.completeMultipartUpload(AmazonS3Client.java:2705) - at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.complete(S3ABlockOutputStream.java:473) - at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.access$200(S3ABlockOutputStream.java:382) - at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:272) - at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72) - at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106) -``` - -This surfaces if, while a multipart upload was taking place, all outstanding multipart -uploads were garbage collected. The upload operation cannot complete because -the data uploaded has been deleted. - -Consult [Cleaning up After Incremental Upload Failures](#s3a_multipart_purge) for -details on how the multipart purge timeout can be set. If multipart uploads -are failing with the message above, it may be a sign that this value is too low. - -### `MultiObjectDeleteException` during delete or rename of files - -``` -Exception in thread "main" com.amazonaws.services.s3.model.MultiObjectDeleteException: - Status Code: 0, AWS Service: null, AWS Request ID: null, AWS Error Code: null, - AWS Error Message: One or more objects could not be deleted, S3 Extended Request ID: null - at com.amazonaws.services.s3.AmazonS3Client.deleteObjects(AmazonS3Client.java:1745) -``` -This happens when trying to delete multiple objects, and one of the objects -could not be deleted. It *should not occur* just because the object is missing. -More specifically: at the time this document was written, we could not create -such a failure. - -It will occur if the caller lacks the permission to delete any of the objects. - -Consult the log to see the specifics of which objects could not be deleted. -Do you have permission to do so? - -If this operation is failing for reasons other than the caller lacking -permissions: - -1. Try setting `fs.s3a.multiobjectdelete.enable` to `false`. -1. Consult [HADOOP-11572](https://issues.apache.org/jira/browse/HADOOP-11572) -for up to date advice. - -### When writing to S3A, HTTP Exceptions logged at info from `AmazonHttpClient` - -``` -[s3a-transfer-shared-pool4-t6] INFO http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496)) - Unable to execute HTTP request: hwdev-steve-ireland-new.s3.amazonaws.com:443 failed to respond -org.apache.http.NoHttpResponseException: bucket.s3.amazonaws.com:443 failed to respond - at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:143) - at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57) - at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:261) - at org.apache.http.impl.AbstractHttpClientConnection.receiveResponseHeader(AbstractHttpClientConnection.java:283) - at org.apache.http.impl.conn.DefaultClientConnection.receiveResponseHeader(DefaultClientConnection.java:259) - at org.apache.http.impl.conn.ManagedClientConnectionImpl.receiveResponseHeader(ManagedClientConnectionImpl.java:209) - at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:272) - at com.amazonaws.http.protocol.SdkHttpRequestExecutor.doReceiveResponse(SdkHttpRequestExecutor.java:66) - at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:124) - at org.apache.http.impl.client.DefaultRequestDirector.tryExecute(DefaultRequestDirector.java:686) - at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:488) - at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:884) - at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82) - at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55) - at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:728) - at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489) - at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310) - at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785) - at com.amazonaws.services.s3.AmazonS3Client.copyPart(AmazonS3Client.java:1731) - at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:41) - at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:28) - at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239) - at java.util.concurrent.FutureTask.run(FutureTask.java:266) - at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) - at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) - at java.lang.Thread.run(Thread.java:745) -``` - -These are HTTP I/O exceptions caught and logged inside the AWS SDK. The client -will attempt to retry the operation; it may just be a transient event. If there -are many such exceptions in logs, it may be a symptom of connectivity or network -problems. - -### Visible S3 Inconsistency - -Amazon S3 is *an eventually consistent object store*. That is: not a filesystem. - -It offers read-after-create consistency: a newly created file is immediately -visible. Except, there is a small quirk: a negative GET may be cached, such -that even if an object is immediately created, the fact that there "wasn't" -an object is still remembered. - -That means the following sequence on its own will be consistent -``` -touch(path) -> getFileStatus(path) -``` - -But this sequence *may* be inconsistent. - -``` -getFileStatus(path) -> touch(path) -> getFileStatus(path) -``` - -A common source of visible inconsistencies is that the S3 metadata -database —the part of S3 which serves list requests— is updated asynchronously. -Newly added or deleted files may not be visible in the index, even though direct -operations on the object (`HEAD` and `GET`) succeed. - -In S3A, that means the `getFileStatus()` and `open()` operations are more likely -to be consistent with the state of the object store than any directory list -operations (`listStatus()`, `listFiles()`, `listLocatedStatus()`, -`listStatusIterator()`). - - -### `FileNotFoundException` even though the file was just written. - -This can be a sign of consistency problems. It may also surface if there is some -asynchronous file write operation still in progress in the client: the operation -has returned, but the write has not yet completed. While the S3A client code -does block during the `close()` operation, we suspect that asynchronous writes -may be taking place somewhere in the stack —this could explain why parallel tests -fail more often than serialized tests. - -### File not found in a directory listing, even though `getFileStatus()` finds it - -(Similarly: deleted file found in listing, though `getFileStatus()` reports -that it is not there) - -This is a visible sign of updates to the metadata server lagging -behind the state of the underlying filesystem. - - -### File not visible/saved - -The files in an object store are not visible until the write has been completed. -In-progress writes are simply saved to a local file/cached in RAM and only uploaded. -at the end of a write operation. If a process terminated unexpectedly, or failed -to call the `close()` method on an output stream, the pending data will have -been lost. - -### File `flush()` and `hflush()` calls do not save data to S3A - -Again, this is due to the fact that the data is cached locally until the -`close()` operation. The S3A filesystem cannot be used as a store of data -if it is required that the data is persisted durably after every -`flush()/hflush()` call. This includes resilient logging, HBase-style journalling -and the like. The standard strategy here is to save to HDFS and then copy to S3. - - -### S3 Server Side Encryption - -#### Using SSE-KMS - -When performing file operations, the user may run into an issue where the KMS -key arn is invalid. -``` -com.amazonaws.services.s3.model.AmazonS3Exception: -Invalid arn (Service: Amazon S3; Status Code: 400; Error Code: KMS.NotFoundException; Request ID: 708284CF60EE233F), -S3 Extended Request ID: iHUUtXUSiNz4kv3Bdk/hf9F+wjPt8GIVvBHx/HEfCBYkn7W6zmpvbA3XT7Y5nTzcZtfuhcqDunw=: -Invalid arn (Service: Amazon S3; Status Code: 400; Error Code: KMS.NotFoundException; Request ID: 708284CF60EE233F) -``` - -This is due to either, the KMS key id is entered incorrectly, or the KMS key id -is in a different region than the S3 bucket being used. - -#### Using SSE-C -When performing file operations the user may run into an unexpected 400/403 -error such as -``` -org.apache.hadoop.fs.s3a.AWSS3IOException: getFileStatus on fork-4/: com.amazonaws.services.s3.model.AmazonS3Exception: -Bad Request (Service: Amazon S3; Status Code: 400; -Error Code: 400 Bad Request; Request ID: 42F9A1987CB49A99), -S3 Extended Request ID: jU2kcwaXnWj5APB14Cgb1IKkc449gu2+dhIsW/+7x9J4D+VUkKvu78mBo03oh9jnOT2eoTLdECU=: -Bad Request (Service: Amazon S3; Status Code: 400; Error Code: 400 Bad Request; Request ID: 42F9A1987CB49A99) -``` - -This can happen in the cases of not specifying the correct SSE-C encryption key. -Such cases can be as follows: -1. An object is encrypted using SSE-C on S3 and either the wrong encryption type -is used, no encryption is specified, or the SSE-C specified is incorrect. -2. A directory is encrypted with a SSE-C keyA and the user is trying to move a -file using configured SSE-C keyB into that structure. - -### Other issues - -*Performance slow* - -S3 is slower to read data than HDFS, even on virtual clusters running on -Amazon EC2. - -* HDFS replicates data for faster query performance -* HDFS stores the data on the local hard disks, avoiding network traffic - if the code can be executed on that host. As EC2 hosts often have their - network bandwidth throttled, this can make a tangible difference. -* HDFS is significantly faster for many "metadata" operations: listing -the contents of a directory, calling `getFileStatus()` on path, -creating or deleting directories. -* On HDFS, Directory renames and deletes are `O(1)` operations. On -S3 renaming is a very expensive `O(data)` operation which may fail partway through -in which case the final state depends on where the copy+ delete sequence was when it failed. -All the objects are copied, then the original set of objects are deleted, so -a failure should not lose data —it may result in duplicate datasets. -* Because the write only begins on a `close()` operation, it may be in the final -phase of a process where the write starts —this can take so long that some things -can actually time out. -* File IO performing many seek calls/positioned read calls will encounter -performance problems due to the size of the HTTP requests made. On S3a, -the (experimental) fadvise policy "random" can be set to alleviate this at the -expense of sequential read performance and bandwidth. - -The slow performance of `rename()` surfaces during the commit phase of work, -including - -* The MapReduce `FileOutputCommitter`. -* DistCp's rename-after-copy operation. -* The `hdfs fs -rm` command renaming the file under `.Trash` rather than -deleting it. Use `-skipTrash` to eliminate that step. - -These operations can be significantly slower when S3 is the destination -compared to HDFS or other "real" filesystem. - -*Improving S3 load-balancing behavior* - -Amazon S3 uses a set of front-end servers to provide access to the underlying data. -The choice of which front-end server to use is handled via load-balancing DNS -service: when the IP address of an S3 bucket is looked up, the choice of which -IP address to return to the client is made based on the the current load -of the front-end servers. - -Over time, the load across the front-end changes, so those servers considered -"lightly loaded" will change. If the DNS value is cached for any length of time, -your application may end up talking to an overloaded server. Or, in the case -of failures, trying to talk to a server that is no longer there. - -And by default, for historical security reasons in the era of applets, -the DNS TTL of a JVM is "infinity". - -To work with AWS better, set the DNS time-to-live of an application which -works with S3 to something lower. See [AWS documentation](http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/java-dg-jvm-ttl.html). diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md index fe67d6954f2..79571227a3b 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md @@ -107,7 +107,6 @@ It is recommended that you leave the default setting here: fs.s3a.metadatastore.authoritative false - ``` Setting this to `true` is currently an experimental feature. When true, the @@ -510,7 +509,6 @@ log4j.logger.com.amazonaws.services.dynamodbv2.AmazonDynamoDB # Log all HTTP requests made; includes S3 interaction. This may # include sensitive information such as account IDs in HTTP headers. log4j.logger.com.amazonaws.request=DEBUG - ``` If all else fails, S3Guard is designed to allow for easy recovery by deleting @@ -538,7 +536,7 @@ S3Guard tables are created with a version marker, an entry with the primary key and child entry of `../VERSION`; the use of a relative path guarantees that it will not be resolved. -#### Versioning policy. +*Versioning policy* 1. The version number of an S3Guard table will only be incremented when an incompatible change is made to the table structure —that is, the structure @@ -557,7 +555,7 @@ in an incompatible manner. The version marker in tables exists to support such an option if it ever becomes necessary, by ensuring that all S3Guard client can recognise any version mismatch. -### Security +## Security All users of the DynamoDB table must have write access to it. This effectively means they must have write access to the entire object store. @@ -569,9 +567,9 @@ are only made after successful file creation, deletion and rename, the store is *unlikely* to get out of sync, it is still something which merits more testing before it could be considered reliable. -### Troubleshooting +## Troubleshooting -#### Error: `S3Guard table lacks version marker.` +### Error: `S3Guard table lacks version marker.` The table which was intended to be used as a S3guard metadata store does not have any version marker indicating that it is a S3Guard table. @@ -581,7 +579,7 @@ It may be that this is not a S3Guard table. * Make sure that this is the correct table name. * Delete the table, so it can be rebuilt. -#### Error: `Database table is from an incompatible S3Guard version` +### Error: `Database table is from an incompatible S3Guard version` This indicates that the version of S3Guard which created (or possibly updated) the database table is from a different version that that expected by the S3A @@ -596,7 +594,7 @@ bucket. Upgrade the application/library. If the expected version is higher than the actual version, then the table itself will need upgrading. -#### Error `"DynamoDB table TABLE does not exist in region REGION; auto-creation is turned off"` +### Error `"DynamoDB table TABLE does not exist in region REGION; auto-creation is turned off"` S3Guard could not find the DynamoDB table for the Metadata Store, and it was not configured to create it. Either the table was missing, @@ -608,3 +606,8 @@ or the configuration is preventing S3Guard from finding the table. 1. If the region is not set, verify that the table exists in the same region as the bucket being used. 1. Create the table if necessary. + + +## Other Topis + +For details on how to test S3Guard, see [Testing S3Guard](./testing.html#s3guard) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3n.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3n.md new file mode 100644 index 00000000000..9b59ad1d382 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3n.md @@ -0,0 +1,52 @@ + + +# The S3N Client + + + +S3N was a Hadoop filesystem client which can read or write data stored +in Amazon S3. It uses URLs with the schema `s3n://`. + +- - - + +**Hadoop's S3N client for Amazon S3 has been superceded by +the S3A connector** + +**Please upgrade to S3A for a supported, higher-performance S3 Client** + +- - - + + +## How to migrate to to the S3A client + +1. Keep the `hadoop-aws` JAR on your classpath. + +1. Add the `aws-java-sdk-bundle.jar` JAR which Hadoop ships +with to your classpath. + +1. Change the authentication keys + + | old key | new key | + |---------|---------| + | `fs.s3n.awsAccessKeyId` | `fs.s3a.access.key` | + | `fs.s3n.awsSecretAccessKey` | `fs.s3a.secret.key` | + + Do make sure the property names are correct. For S3A, they are + `fs.s3a.access.key` and `fs.s3a.secret.key` —you cannot just copy the S3N + properties and replace `s3n` with `s3a`. + +1. Replace URLs which began with `s3n://` with `s3a://` + +1. You may now remove the `jets3t` JAR, as it is no longer needed. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md index 19d322db877..cf7a2e43375 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md @@ -12,9 +12,9 @@ limitations under the License. See accompanying LICENSE file. --> -# Testing the S3 filesystem clients +# Testing the S3A filesystem client and its features, including S3Guard - + This module includes both unit tests, which can run in isolation without connecting to the S3 service, and integration tests, which require a working @@ -26,7 +26,7 @@ Due to eventual consistency, integration tests may fail without reason. Transient failures, which no longer occur upon rerunning the test, should thus be ignored. -## Policy for submitting patches which affect the `hadoop-aws` module. +## Policy for submitting patches which affect the `hadoop-aws` module. The Apache Jenkins infrastucture does not run any S3 integration tests, due to the need to keep credentials secure. @@ -74,7 +74,7 @@ in the production code, that could be a sign of a problem which may surface over long-haul connections. Please help us identify and fix these problems — especially as you are the one best placed to verify the fixes work. -## Setting up the tests +## Setting up the tests To integration test the S3* filesystem clients, you need to provide `auth-keys.xml` which passes in authentication details to the test runner. @@ -101,16 +101,11 @@ The XML file must contain all the ID/key information needed to connect each of the filesystem clients to the object stores, and a URL for each filesystem for its testing. -1. `test.fs.s3n.name` : the URL of the bucket for S3n tests 1. `test.fs.s3a.name` : the URL of the bucket for S3a tests -1. `fs.contract.test.fs.s3n` : the URL of the bucket for S3n filesystem contract tests 1. `fs.contract.test.fs.s3a` : the URL of the bucket for S3a filesystem contract tests -*Note* that running s3a and s3n tests in parallel mode, against the same bucket -is unreliable. We recommend using separate buckets or testing one connector -at a time. -The contents of each bucket will be destroyed during the test process: +The contents of the bucket will be destroyed during the test process: do not use the bucket for any purpose other than testing. Furthermore, for s3a, all in-progress multi-part uploads to the bucket will be aborted at the start of a test (by forcing `fs.s3a.multipart.purge=true`) to clean up the @@ -121,26 +116,6 @@ Example: ```xml - - test.fs.s3n.name - s3n://test-aws-s3n/ - - - - fs.contract.test.fs.s3n - ${test.fs.s3n.name} - - - - fs.s3n.awsAccessKeyId - DONOTPCOMMITTHISKEYTOSCM - - - - fs.s3n.awsSecretAccessKey - DONOTEVERSHARETHISSECRETKEY! - - test.fs.s3a.name s3a://test-aws-s3a/ @@ -172,7 +147,7 @@ Example: ``` -### Configuring S3a Encryption +### Configuring S3a Encryption For S3a encryption tests to run correctly, the `fs.s3a.server-side-encryption.key` must be configured in the s3a contract xml @@ -192,7 +167,7 @@ You can also force all the tests to run with a specific SSE encryption method by configuring the property `fs.s3a.server-side-encryption-algorithm` in the s3a contract file. -## Running the Tests +## Running the Tests After completing the configuration, execute the test run through Maven. @@ -251,7 +226,7 @@ combination with `test` or `it.test`. If you know that you are specifying only tests that can run safely in parallel, then it will work. For wide patterns, like `ITestS3A*` shown above, it may cause unpredictable test failures. -### Testing against different regions +### Testing against different regions S3A can connect to different regions —the tests support this. Simply define the target region in `auth-keys.xml`. @@ -265,7 +240,7 @@ define the target region in `auth-keys.xml`. This is used for all tests expect for scale tests using a Public CSV.gz file (see below) -### CSV Data source Tests +### CSV Data Tests The `TestS3AInputStreamPerformance` tests require read access to a multi-MB text file. The default file for these tests is one published by amazon, @@ -303,7 +278,7 @@ For the default test dataset, hosted in the `landsat-pds` bucket, this is: ``` -### Viewing Integration Test Reports +## Viewing Integration Test Reports Integration test results and logs are stored in `target/failsafe-reports/`. @@ -313,7 +288,7 @@ plugin: ```bash mvn surefire-report:failsafe-report-only ``` -### Scale Tests +## Scale Tests There are a set of tests designed to measure the scalability and performance at scale of the S3A tests, *Scale Tests*. Tests include: creating @@ -325,7 +300,7 @@ By their very nature they are slow. And, as their execution time is often limited by bandwidth between the computer running the tests and the S3 endpoint, parallel execution does not speed these tests up. -#### Enabling the Scale Tests +### Enabling the Scale Tests The tests are enabled if the `scale` property is set in the maven build this can be done regardless of whether or not the parallel test profile @@ -342,7 +317,7 @@ sequentially; those which are slow due to HTTPS setup costs or server-side actionsare included in the set of parallelized tests. -#### Maven build tuning options +### Tuning scale optins from Maven Some of the tests can be tuned from the maven build or from the @@ -373,7 +348,7 @@ Only a few properties can be set this way; more will be added. The file and partition sizes are numeric values with a k/m/g/t/p suffix depending on the desired size. For example: 128M, 128m, 2G, 2G, 4T or even 1P. -#### Scale test configuration options +### Scale test configuration options Some scale tests perform multiple operations (such as creating many directories). @@ -418,7 +393,7 @@ smaller to achieve faster test runs. S3A specific scale test properties are -##### `fs.s3a.scale.test.huge.filesize`: size in MB for "Huge file tests". +*`fs.s3a.scale.test.huge.filesize`: size in MB for "Huge file tests".* The Huge File tests validate S3A's ability to handle large files —the property `fs.s3a.scale.test.huge.filesize` declares the file size to use. @@ -452,13 +427,11 @@ Otherwise, set a large timeout in `fs.s3a.scale.test.timeout` ``` - The tests are executed in an order to only clean up created files after the end of all the tests. If the tests are interrupted, the test data will remain. - -## Testing against non AWS S3 endpoints. +## Testing against non AWS S3 endpoints. The S3A filesystem is designed to work with storage endpoints which implement the S3 protocols to the extent that the amazon S3 SDK is capable of talking @@ -527,7 +500,7 @@ An alternate endpoint may be defined in `test.fs.s3a.sts.endpoint`. The default is ""; meaning "use the amazon default value". -## Debugging Test failures +## Debugging Test failures Logging at debug level is the standard way to provide more diagnostics output; after setting this rerun the tests @@ -550,7 +523,7 @@ setting the `fs.s3a.user.agent.prefix` to a unique prefix for a specific test run, which will enable the specific log entries to be more easily located. -## Adding new tests +## Adding new tests New tests are always welcome. Bear in mind that we need to keep costs and test time down, which is done by @@ -593,7 +566,7 @@ fail with meaningful diagnostics, so any new problems can be easily debugged from test logs. -### Requirements of new Tests +## Requirements of new Tests This is what we expect from new tests; they're an extension of the normal @@ -602,7 +575,7 @@ use requires the presence of secret credentials, where tests may be slow, and where finding out why something failed from nothing but the test output is critical. -#### Subclasses Existing Shared Base Classes +### Subclasses Existing Shared Base Classes Extend `AbstractS3ATestBase` or `AbstractSTestS3AHugeFiles` unless justifiable. These set things up for testing against the object stores, provide good threadnames, @@ -619,12 +592,12 @@ defined in `fs.s3a.contract.test` Having shared base classes may help reduce future maintenance too. Please use them/ -#### Secure +### Secure Don't ever log credentials. The credential tests go out of their way to not provide meaningful logs or assertion messages precisely to avoid this. -#### Efficient of Time and Money +### Efficient of Time and Money This means efficient in test setup/teardown, and, ideally, making use of existing public datasets to save setup time and tester cost. @@ -650,7 +623,7 @@ against other regions, or with third party S3 implementations. Thus the URL can be overridden for testing elsewhere. -#### Works With Other S3 Endpoints +### Works With Other S3 Endpoints Don't assume AWS S3 US-East only, do allow for working with external S3 implementations. Those may be behind the latest S3 API features, not support encryption, session @@ -678,7 +651,7 @@ adds some newlines so as to be easier to spot. 1. Use `ContractTestUtils.NanoTimer` to measure the duration of operations, and log the output. -#### Fails Meaningfully +### Fails Meaningfully The `ContractTestUtils` class contains a whole set of assertions for making statements about the expected state of a filesystem, e.g. @@ -705,7 +678,7 @@ get called. We really appreciate this — you will too. -## Tips +## Tips ### How to keep your credentials really safe @@ -725,7 +698,7 @@ using an absolute XInclude reference to it. ``` -# Failure Injection +# Failure Injection **Warning do not enable any type of failure injection in production. The following settings are for testing only.** @@ -858,7 +831,10 @@ The inconsistent client is shipped in the `hadoop-aws` JAR, so it can be used in applications which work with S3 to see how they handle inconsistent directory listings. -## Testing S3Guard +## Testing S3Guard + +[S3Guard](./s3guard.html) is an extension to S3A which adds consistent metadata +listings to the S3A client. As it is part of S3A, it also needs to be tested. The basic strategy for testing S3Guard correctness consists of: @@ -934,13 +910,6 @@ If the `s3guard` profile *is* set, overwrite any previously set in the configuration files. 1. DynamoDB will be configured to create any missing tables. -### Warning About Concurrent Tests - -You must not run S3A and S3N tests in parallel on the same bucket. This is -especially true when S3Guard is enabled. S3Guard requires that all clients -that are modifying the bucket have S3Guard enabled, so having S3N -integration tests running in parallel with S3A tests will cause strange -failures. ### Scale Testing MetadataStore Directly diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md index d79720e76fc..619ffc15dff 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md @@ -12,27 +12,628 @@ limitations under the License. See accompanying LICENSE file. --> -# Troubleshooting S3A +# Troubleshooting -Here are some lower level details and hints on troubleshooting and tuning -the S3A client. + -## Logging at lower levels +## Introduction -The AWS SDK and the Apache HTTP components can be configured to log at -more detail, as can S3A itself. +Common problems working with S3 are -```properties -log4j.logger.org.apache.hadoop.fs.s3a=DEBUG -log4j.logger.com.amazonaws.request=DEBUG -log4j.logger.org.apache.http=DEBUG -log4j.logger.org.apache.http.wire=ERROR +1. Classpath setup +1. Authentication +1. S3 Inconsistency side-effects + +Classpath is usually the first problem. For the S3x filesystem clients, +you need the Hadoop-specific filesystem clients, third party S3 client libraries +compatible with the Hadoop code, and any dependent libraries compatible with +Hadoop and the specific JVM. + +The classpath must be set up for the process talking to S3: if this is code +running in the Hadoop cluster, the JARs must be on that classpath. That +includes `distcp` and the `hadoop fs` command. + + + +## Classpath Setup + +Note that for security reasons, the S3A client does not provide much detail +on the authentication process (i.e. the secrets used to authenticate). + +### `ClassNotFoundException: org.apache.hadoop.fs.s3a.S3AFileSystem` + +These is Hadoop filesytem client classes, found in the `hadoop-aws` JAR. +An exception reporting this class as missing means that this JAR is not on +the classpath. + +### `ClassNotFoundException: com.amazonaws.services.s3.AmazonS3Client` + +(or other `com.amazonaws` class.) + +This means that the `aws-java-sdk-bundle.jar` JAR is not on the classpath: +add it. + +### Missing method in `com.amazonaws` class + +This can be triggered by incompatibilities between the AWS SDK on the classpath +and the version which Hadoop was compiled with. + +The AWS SDK JARs change their signature enough between releases that the only +way to safely update the AWS SDK version is to recompile Hadoop against the later +version. + +The sole fix is to use the same version of the AWS SDK with which Hadoop +was built. + + +## Authentication Failure + +If Hadoop cannot authenticate with the S3 service endpoint, +the client retries a number of times before eventually failing. +When it finally gives up, it will report a message about signature mismatch: + +``` +com.amazonaws.services.s3.model.AmazonS3Exception: + The request signature we calculated does not match the signature you provided. + Check your key and signing method. + (Service: Amazon S3; Status Code: 403; Error Code: SignatureDoesNotMatch, ``` -Be aware that logging HTTP headers may leak sensitive AWS account information, -so should not be shared. +The likely cause is that you either have the wrong credentials or somehow +the credentials were not readable on the host attempting to read or write +the S3 Bucket. -## Advanced: network performance +Enabling debug logging for the package `org.apache.hadoop.fs.s3a` +can help provide more information. + +The most common cause is that you have the wrong credentials for any of the current +authentication mechanism(s) —or somehow +the credentials were not readable on the host attempting to read or write +the S3 Bucket. However, there are a couple of system configuration problems +(JVM version, system clock) which also need to be checked. + +Most common: there's an error in the configuration properties. + +1. Make sure that the name of the bucket is the correct one. +That is: check the URL. + +1. If using a private S3 server, make sure endpoint in `fs.s3a.endpoint` has +been set to this server -and that the client is not accidentally trying to +authenticate with the public Amazon S3 service. + +1. Make sure the property names are correct. For S3A, they are +`fs.s3a.access.key` and `fs.s3a.secret.key` —you cannot just copy the S3N +properties and replace `s3n` with `s3a`. + +1. Make sure the properties are visible to the process attempting to +talk to the object store. Placing them in `core-site.xml` is the standard +mechanism. + +1. If using session authentication, the session may have expired. +Generate a new session token and secret. + +1. If using environement variable-based authentication, make sure that the +relevant variables are set in the environment in which the process is running. + +The standard first step is: try to use the AWS command line tools with the same +credentials, through a command such as: + + hadoop fs -ls s3a://my-bucket/ + +Note the trailing "/" here; without that the shell thinks you are trying to list +your home directory under the bucket, which will only exist if explicitly created. + + +Attempting to list a bucket using inline credentials is a +means of verifying that the key and secret can access a bucket; + + hadoop fs -ls s3a://key:secret@my-bucket/ + +Do escape any `+` or `/` symbols in the secret, as discussed below, and never +share the URL, logs generated using it, or use such an inline authentication +mechanism in production. + +Finally, if you set the environment variables, you can take advantage of S3A's +support of environment-variable authentication by attempting the same ls operation. +That is: unset the `fs.s3a` secrets and rely on the environment variables. + +### Authentication failure due to clock skew + +The timestamp is used in signing to S3, so as to +defend against replay attacks. If the system clock is too far behind *or ahead* +of Amazon's, requests will be rejected. + +This can surface as the situation where +read requests are allowed, but operations which write to the bucket are denied. + +Check the system clock. + +### Authentication failure when using URLs with embedded secrets + +If using the (strongly discouraged) mechanism of including the +AWS Key and secret in a URL, then both "+" and "/" symbols need +to encoded in the URL. As many AWS secrets include these characters, +encoding problems are not uncommon. + +| symbol | encoded value| +|-----------|-------------| +| `+` | `%2B` | +| `/` | `%2F` | + + +As an example, a URL for `bucket` with AWS ID `user1` and secret `a+b/c` would +be represented as + +``` +s3a://user1:a%2Bb%2Fc@bucket/ +``` + +This technique is only needed when placing secrets in the URL. Again, +this is something users are strongly advised against using. + +### "Bad Request" exception when working with AWS S3 Frankfurt, Seoul, or other "V4" endpoint + + +S3 Frankfurt and Seoul *only* support +[the V4 authentication API](http://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html). + +Requests using the V2 API will be rejected with 400 `Bad Request` + +``` +$ bin/hadoop fs -ls s3a://frankfurt/ +WARN s3a.S3AFileSystem: Client: Amazon S3 error 400: 400 Bad Request; Bad Request (retryable) + +com.amazonaws.services.s3.model.AmazonS3Exception: Bad Request (Service: Amazon S3; Status Code: 400; Error Code: 400 Bad Request; Request ID: 923C5D9E75E44C06), S3 Extended Request ID: HDwje6k+ANEeDsM6aJ8+D5gUmNAMguOk2BvZ8PH3g9z0gpH+IuwT7N19oQOnIr5CIx7Vqb/uThE= + at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1182) + at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:770) + at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489) + at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310) + at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785) + at com.amazonaws.services.s3.AmazonS3Client.headBucket(AmazonS3Client.java:1107) + at com.amazonaws.services.s3.AmazonS3Client.doesBucketExist(AmazonS3Client.java:1070) + at org.apache.hadoop.fs.s3a.S3AFileSystem.verifyBucketExists(S3AFileSystem.java:307) + at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:284) + at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2793) + at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:101) + at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2830) + at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2812) + at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:389) + at org.apache.hadoop.fs.Path.getFileSystem(Path.java:356) + at org.apache.hadoop.fs.shell.PathData.expandAsGlob(PathData.java:325) + at org.apache.hadoop.fs.shell.Command.expandArgument(Command.java:235) + at org.apache.hadoop.fs.shell.Command.expandArguments(Command.java:218) + at org.apache.hadoop.fs.shell.FsCommand.processRawArguments(FsCommand.java:103) + at org.apache.hadoop.fs.shell.Command.run(Command.java:165) + at org.apache.hadoop.fs.FsShell.run(FsShell.java:315) + at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76) + at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:90) + at org.apache.hadoop.fs.FsShell.main(FsShell.java:373) +ls: doesBucketExist on frankfurt-new: com.amazonaws.services.s3.model.AmazonS3Exception: + Bad Request (Service: Amazon S3; Status Code: 400; Error Code: 400 Bad Request; +``` + +This happens when trying to work with any S3 service which only supports the +"V4" signing API —but the client is configured to use the default S3 service +endpoint. + +The S3A client needs to be given the endpoint to use via the `fs.s3a.endpoint` +property. + +As an example, the endpoint for S3 Frankfurt is `s3.eu-central-1.amazonaws.com`: + +```xml + + fs.s3a.endpoint + s3.eu-central-1.amazonaws.com + +``` + +## Connectivity Problems + +### Error message "The bucket you are attempting to access must be addressed using the specified endpoint" + +This surfaces when `fs.s3a.endpoint` is configured to use an S3 service endpoint +which is neither the original AWS one, `s3.amazonaws.com` , nor the one where +the bucket is hosted. The error message contains the redirect target returned +by S3, which can be used to determine the correct value for `fs.s3a.endpoint`. + +``` +org.apache.hadoop.fs.s3a.AWSS3IOException: Received permanent redirect response + to bucket.s3-us-west-2.amazonaws.com. This likely indicates that the S3 + endpoint configured in fs.s3a.endpoint does not match the AWS region + containing the bucket.: The bucket you are attempting to access must be + addressed using the specified endpoint. Please send all future requests to + this endpoint. (Service: Amazon S3; Status Code: 301; + Error Code: PermanentRedirect; Request ID: 7D39EC1021C61B11) + at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:132) + at org.apache.hadoop.fs.s3a.S3AFileSystem.initMultipartUploads(S3AFileSystem.java:287) + at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:203) + at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2895) + at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:102) + at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2932) + at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2914) + at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:390) +``` + +1. Use the [Specific endpoint of the bucket's S3 service](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region) +1. If not using "V4" authentication (see above), the original S3 endpoint +can be used: + +```xml + + fs.s3a.endpoint + s3.amazonaws.com + +``` + +Using the explicit endpoint for the region is recommended for speed and +to use the V4 signing API. + + +### "Timeout waiting for connection from pool" when writing data + +This happens when using the output stream thread pool runs out of capacity. + +``` +[s3a-transfer-shared-pool1-t20] INFO http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496)) - Unable to execute HTTP request: Timeout waiting for connection from poolorg.apache.http.conn.ConnectionPoolTimeoutException: Timeout waiting for connection from pool + at org.apache.http.impl.conn.PoolingClientConnectionManager.leaseConnection(PoolingClientConnectionManager.java:230) + at org.apache.http.impl.conn.PoolingClientConnectionManager$1.getConnection(PoolingClientConnectionManager.java:199) + at sun.reflect.GeneratedMethodAccessor13.invoke(Unknown Source) + at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) + at java.lang.reflect.Method.invoke(Method.java:498) + at com.amazonaws.http.conn.ClientConnectionRequestFactory$Handler.invoke(ClientConnectionRequestFactory.java:70) + at com.amazonaws.http.conn.$Proxy10.getConnection(Unknown Source) + at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:424) + at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:884) + at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82) + at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55) + at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:728) + at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489) + at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310) + at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785) + at com.amazonaws.services.s3.AmazonS3Client.doUploadPart(AmazonS3Client.java:2921) + at com.amazonaws.services.s3.AmazonS3Client.uploadPart(AmazonS3Client.java:2906) + at org.apache.hadoop.fs.s3a.S3AFileSystem.uploadPart(S3AFileSystem.java:1025) + at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload$1.call(S3ABlockOutputStream.java:360) + at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload$1.call(S3ABlockOutputStream.java:355) + at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239) + at java.util.concurrent.FutureTask.run(FutureTask.java:266) + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) + at java.lang.Thread.run(Thread.java:745) +``` + +Make sure that `fs.s3a.connection.maximum` is at least larger +than `fs.s3a.threads.max`. + +```xml + + fs.s3a.threads.max + 20 + + + + fs.s3a.connection.maximum + 30 + +``` + +### "Timeout waiting for connection from pool" when reading data + +This happens when more threads are trying to read from an S3A system than +the maximum number of allocated HTTP connections. + +Set `fs.s3a.connection.maximum` to a larger value (and at least as large as +`fs.s3a.threads.max`) + +### Out of heap memory when writing with via Fast Upload + +This can happen when using the upload buffering mechanism +uses memory (either `fs.s3a.fast.upload.buffer=array` or +`fs.s3a.fast.upload.buffer=bytebuffer`). + +More data is being generated than in the JVM than it can upload to S3 —and +so much data has been buffered that the JVM has run out of memory. + +1. Consult [S3A Fast Upload Thread Tuning](./index.html#fast_upload_thread_tuning) for +detail on this issue and options to address it. + +1. Switch to buffering to disk, rather than memory. + + +This surfaces if, while a multipart upload was taking place, all outstanding multipart +uploads were garbage collected. The upload operation cannot complete because +the data uploaded has been deleted. + +Consult [Cleaning up After Incremental Upload Failures](./index.html#multipart_purge) for +details on how the multipart purge timeout can be set. If multipart uploads +are failing with the message above, it may be a sign that this value is too low. + +### `MultiObjectDeleteException` during delete or rename of files + +``` +Exception in thread "main" com.amazonaws.services.s3.model.MultiObjectDeleteException: + Status Code: 0, AWS Service: null, AWS Request ID: null, AWS Error Code: null, + AWS Error Message: One or more objects could not be deleted, S3 Extended Request ID: null + at com.amazonaws.services.s3.AmazonS3Client.deleteObjects(AmazonS3Client.java:1745) +``` +This happens when trying to delete multiple objects, and one of the objects +could not be deleted. It *should not occur* just because the object is missing. +More specifically: at the time this document was written, we could not create +such a failure. + +It will occur if the caller lacks the permission to delete any of the objects. + +Consult the log to see the specifics of which objects could not be deleted. +Do you have permission to do so? + +If this operation is failing for reasons other than the caller lacking +permissions: + +1. Try setting `fs.s3a.multiobjectdelete.enable` to `false`. +1. Consult [HADOOP-11572](https://issues.apache.org/jira/browse/HADOOP-11572) +for up to date advice. + +### "Failed to Sanitize XML document" + +``` +org.apache.hadoop.fs.s3a.AWSClientIOException: getFileStatus on test/testname/streaming/: + com.amazonaws.AmazonClientException: Failed to sanitize XML document + destined for handler class com.amazonaws.services.s3.model.transform.XmlResponsesSaxParser$ListBucketHandler: + Failed to sanitize XML document destined for handler class com.amazonaws.services.s3.model.transform.XmlResponsesSaxParser$ListBucketHandler + at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:105) + at org.apache.hadoop.fs.s3a.S3AFileSystem.getFileStatus(S3AFileSystem.java:1462) + at org.apache.hadoop.fs.s3a.S3AFileSystem.innerListStatus(S3AFileSystem.java:1227) + at org.apache.hadoop.fs.s3a.S3AFileSystem.listStatus(S3AFileSystem.java:1203) + at org.apache.hadoop.fs.s3a.S3AGlobber.listStatus(S3AGlobber.java:69) + at org.apache.hadoop.fs.s3a.S3AGlobber.doGlob(S3AGlobber.java:210) + at org.apache.hadoop.fs.s3a.S3AGlobber.glob(S3AGlobber.java:125) + at org.apache.hadoop.fs.s3a.S3AFileSystem.globStatus(S3AFileSystem.java:1853) + at org.apache.hadoop.fs.s3a.S3AFileSystem.globStatus(S3AFileSystem.java:1841) +``` + +We believe this is caused by the connection to S3 being broken. +See [HADOOP-13811](https://issues.apache.org/jira/browse/HADOOP-13811). + +It may go away if the operation is retried. + +### JSON Parse Error from AWS SDK + +Sometimes a JSON Parse error is reported with the stack trace in the `com.amazonaws`, + +Again, we believe this is caused by the connection to S3 being broken. + +It may go away if the operation is retried. + + + +## Miscellaneous Errors + +### When writing data: "java.io.FileNotFoundException: Completing multi-part upload" + + +``` +java.io.FileNotFoundException: Completing multi-part upload on fork-5/test/multipart/1c397ca6-9dfb-4ac1-9cf7-db666673246b: com.amazonaws.services.s3.model.AmazonS3Exception: The specified upload does not exist. The upload ID may be invalid, or the upload may have been aborted or completed. (Service: Amazon S3; Status Code: 404; Error Code: NoSuchUpload; Request ID: 84FF8057174D9369), S3 Extended Request ID: Ij5Yn6Eq/qIERH4Z6Io3YL2t9/qNZ7z9gjPb1FrTtTovZ8k1MXqh+zCYYjqmfJ/fCY6E1+JR9jA= + at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1182) + at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:770) + at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489) + at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310) + at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785) + at com.amazonaws.services.s3.AmazonS3Client.completeMultipartUpload(AmazonS3Client.java:2705) + at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.complete(S3ABlockOutputStream.java:473) + at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.access$200(S3ABlockOutputStream.java:382) + at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:272) + at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72) + at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106) +``` + +### Issue: when writing data, HTTP Exceptions logged at info from `AmazonHttpClient` + +``` +[s3a-transfer-shared-pool4-t6] INFO http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496)) - Unable to execute HTTP request: hwdev-steve-ireland-new.s3.amazonaws.com:443 failed to respond +org.apache.http.NoHttpResponseException: bucket.s3.amazonaws.com:443 failed to respond + at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:143) + at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57) + at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:261) + at org.apache.http.impl.AbstractHttpClientConnection.receiveResponseHeader(AbstractHttpClientConnection.java:283) + at org.apache.http.impl.conn.DefaultClientConnection.receiveResponseHeader(DefaultClientConnection.java:259) + at org.apache.http.impl.conn.ManagedClientConnectionImpl.receiveResponseHeader(ManagedClientConnectionImpl.java:209) + at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:272) + at com.amazonaws.http.protocol.SdkHttpRequestExecutor.doReceiveResponse(SdkHttpRequestExecutor.java:66) + at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:124) + at org.apache.http.impl.client.DefaultRequestDirector.tryExecute(DefaultRequestDirector.java:686) + at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:488) + at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:884) + at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82) + at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55) + at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:728) + at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489) + at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310) + at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785) + at com.amazonaws.services.s3.AmazonS3Client.copyPart(AmazonS3Client.java:1731) + at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:41) + at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:28) + at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239) + at java.util.concurrent.FutureTask.run(FutureTask.java:266) + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) + at java.lang.Thread.run(Thread.java:745) +``` + +These are HTTP I/O exceptions caught and logged inside the AWS SDK. The client +will attempt to retry the operation; it may just be a transient event. If there +are many such exceptions in logs, it may be a symptom of connectivity or network +problems. + +## File System Semantics + +These are the issues where S3 does not appear to behave the way a filesystem +"should". + +### Visible S3 Inconsistency + +Amazon S3 is *an eventually consistent object store*. That is: not a filesystem. + +To reduce visible inconsistencies, use the [S3Guard](./s3guard.html) consistency +cache. + + +By default, Amazon S3 offers read-after-create consistency: a newly created file +is immediately visible. +There is a small quirk: a negative GET may be cached, such +that even if an object is immediately created, the fact that there "wasn't" +an object is still remembered. + +That means the following sequence on its own will be consistent +``` +touch(path) -> getFileStatus(path) +``` + +But this sequence *may* be inconsistent. + +``` +getFileStatus(path) -> touch(path) -> getFileStatus(path) +``` + +A common source of visible inconsistencies is that the S3 metadata +database —the part of S3 which serves list requests— is updated asynchronously. +Newly added or deleted files may not be visible in the index, even though direct +operations on the object (`HEAD` and `GET`) succeed. + +That means the `getFileStatus()` and `open()` operations are more likely +to be consistent with the state of the object store, but without S3Guard enabled, +directory list operations such as `listStatus()`, `listFiles()`, `listLocatedStatus()`, +and `listStatusIterator()` may not see newly created files, and still list +old files. + +### `FileNotFoundException` even though the file was just written. + +This can be a sign of consistency problems. It may also surface if there is some +asynchronous file write operation still in progress in the client: the operation +has returned, but the write has not yet completed. While the S3A client code +does block during the `close()` operation, we suspect that asynchronous writes +may be taking place somewhere in the stack —this could explain why parallel tests +fail more often than serialized tests. + +### File not found in a directory listing, even though `getFileStatus()` finds it + +(Similarly: deleted file found in listing, though `getFileStatus()` reports +that it is not there) + +This is a visible sign of updates to the metadata server lagging +behind the state of the underlying filesystem. + +Fix: Use S3Guard + + +### File not visible/saved + +The files in an object store are not visible until the write has been completed. +In-progress writes are simply saved to a local file/cached in RAM and only uploaded. +at the end of a write operation. If a process terminated unexpectedly, or failed +to call the `close()` method on an output stream, the pending data will have +been lost. + +### File `flush()`, `hsync` and `hflush()` calls do not save data to S3 + +Again, this is due to the fact that the data is cached locally until the +`close()` operation. The S3A filesystem cannot be used as a store of data +if it is required that the data is persisted durably after every +`Syncable.hflush()` or `Syncable.hsync()` call. +This includes resilient logging, HBase-style journalling +and the like. The standard strategy here is to save to HDFS and then copy to S3. + +## S3 Server Side Encryption + +### Using SSE-KMS "Invalid arn" + +When performing file operations, the user may run into an issue where the KMS +key arn is invalid. +``` +com.amazonaws.services.s3.model.AmazonS3Exception: +Invalid arn (Service: Amazon S3; Status Code: 400; Error Code: KMS.NotFoundException; Request ID: 708284CF60EE233F), +S3 Extended Request ID: iHUUtXUSiNz4kv3Bdk/hf9F+wjPt8GIVvBHx/HEfCBYkn7W6zmpvbA3XT7Y5nTzcZtfuhcqDunw=: +Invalid arn (Service: Amazon S3; Status Code: 400; Error Code: KMS.NotFoundException; Request ID: 708284CF60EE233F) +``` + +This is due to either, the KMS key id is entered incorrectly, or the KMS key id +is in a different region than the S3 bucket being used. + +### Using SSE-C "Bad Request" + +When performing file operations the user may run into an unexpected 400/403 +error such as +``` +org.apache.hadoop.fs.s3a.AWSS3IOException: getFileStatus on fork-4/: com.amazonaws.services.s3.model.AmazonS3Exception: +Bad Request (Service: Amazon S3; Status Code: 400; +Error Code: 400 Bad Request; Request ID: 42F9A1987CB49A99), +S3 Extended Request ID: jU2kcwaXnWj5APB14Cgb1IKkc449gu2+dhIsW/+7x9J4D+VUkKvu78mBo03oh9jnOT2eoTLdECU=: +Bad Request (Service: Amazon S3; Status Code: 400; Error Code: 400 Bad Request; Request ID: 42F9A1987CB49A99) +``` + +This can happen in the cases of not specifying the correct SSE-C encryption key. +Such cases can be as follows: +1. An object is encrypted using SSE-C on S3 and either the wrong encryption type +is used, no encryption is specified, or the SSE-C specified is incorrect. +2. A directory is encrypted with a SSE-C keyA and the user is trying to move a +file using configured SSE-C keyB into that structure. + +## Performance + +S3 is slower to read data than HDFS, even on virtual clusters running on +Amazon EC2. + +* HDFS replicates data for faster query performance. +* HDFS stores the data on the local hard disks, avoiding network traffic + if the code can be executed on that host. As EC2 hosts often have their + network bandwidth throttled, this can make a tangible difference. +* HDFS is significantly faster for many "metadata" operations: listing +the contents of a directory, calling `getFileStatus()` on path, +creating or deleting directories. (S3Guard reduces but does not eliminate +the speed gap). +* On HDFS, Directory renames and deletes are `O(1)` operations. On +S3 renaming is a very expensive `O(data)` operation which may fail partway through +in which case the final state depends on where the copy+ delete sequence was when it failed. +All the objects are copied, then the original set of objects are deleted, so +a failure should not lose data —it may result in duplicate datasets. +* Unless fast upload enabled, the write only begins on a `close()` operation. +This can take so long that some applications can actually time out. +* File IO involving many seek calls/positioned read calls will encounter +performance problems due to the size of the HTTP requests made. Enable the +"random" fadvise policy to alleviate this at the +expense of sequential read performance and bandwidth. + +The slow performance of `rename()` surfaces during the commit phase of work, +including + +* The MapReduce `FileOutputCommitter`. This also used by Apache Spark. +* DistCp's rename-after-copy operation. +* The `hdfs fs -rm` command renaming the file under `.Trash` rather than +deleting it. Use `-skipTrash` to eliminate that step. + +These operations can be significantly slower when S3 is the destination +compared to HDFS or other "real" filesystem. + +*Improving S3 load-balancing behavior* + +Amazon S3 uses a set of front-end servers to provide access to the underlying data. +The choice of which front-end server to use is handled via load-balancing DNS +service: when the IP address of an S3 bucket is looked up, the choice of which +IP address to return to the client is made based on the the current load +of the front-end servers. + +Over time, the load across the front-end changes, so those servers considered +"lightly loaded" will change. If the DNS value is cached for any length of time, +your application may end up talking to an overloaded server. Or, in the case +of failures, trying to talk to a server that is no longer there. + +And by default, for historical security reasons in the era of applets, +the DNS TTL of a JVM is "infinity". + +To work with AWS better, set the DNS time-to-live of an application which +works with S3 to something lower. See [AWS documentation](http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/java-dg-jvm-ttl.html). + +## Troubleshooting network performance An example of this is covered in [HADOOP-13871](https://issues.apache.org/jira/browse/HADOOP-13871). @@ -49,4 +650,74 @@ Consider reducing the connection timeout of the s3a connection. 15000 ``` -This *may* cause the client to react faster to network pauses. +This *may* cause the client to react faster to network pauses, so display +stack traces fast. At the same time, it may be less resilient to +connectivity problems. + + +## Other Issues + +### Enabling low-level logging + +The AWS SDK and the Apache S3 components can be configured to log at +more detail, as can S3A itself. + +```properties +log4j.logger.org.apache.hadoop.fs.s3a=DEBUG +log4j.logger.com.amazonaws.request=DEBUG +log4j.logger.com.amazonaws.thirdparty.apache.http=DEBUG +``` + +If using the "unshaded" JAR, then the Apache HttpClient can be directly configured: + +```properties +log4j.logger.org.apache.http=DEBUG +``` + + +This produces a log such as this, wich is for a V4-authenticated PUT of a 0-byte file used +as an empty directory marker + +``` +execchain.MainClientExec (MainClientExec.java:execute(255)) - Executing request PUT /test/ HTTP/1.1 +execchain.MainClientExec (MainClientExec.java:execute(266)) - Proxy auth state: UNCHALLENGED +http.headers (LoggingManagedHttpClientConnection.java:onRequestSubmitted(135)) - http-outgoing-0 >> PUT /test/ HTTP/1.1 +http.headers (LoggingManagedHttpClientConnection.java:onRequestSubmitted(138)) - http-outgoing-0 >> Host: ireland-new.s3-eu-west-1.amazonaws.com +http.headers (LoggingManagedHttpClientConnection.java:onRequestSubmitted(138)) - http-outgoing-0 >> x-amz-content-sha256: UNSIGNED-PAYLOAD +http.headers (LoggingManagedHttpClientConnection.java:onRequestSubmitted(138)) - http-outgoing-0 >> Authorization: AWS4-HMAC-SHA256 Credential=AKIAIYZ5JEEEER/20170904/eu-west-1/s3/aws4_request, ... +http.headers (LoggingManagedHttpClientConnection.java:onRequestSubmitted(138)) - http-outgoing-0 >> X-Amz-Date: 20170904T172929Z +http.headers (LoggingManagedHttpClientConnection.java:onRequestSubmitted(138)) - http-outgoing-0 >> User-Agent: Hadoop 3.0.0-beta-1, aws-sdk-java/1.11.134 ... +http.headers (LoggingManagedHttpClientConnection.java:onRequestSubmitted(138)) - http-outgoing-0 >> amz-sdk-invocation-id: 75b530f8-ad31-1ad3-13db-9bd53666b30d +http.headers (LoggingManagedHttpClientConnection.java:onRequestSubmitted(138)) - http-outgoing-0 >> amz-sdk-retry: 0/0/500 +http.headers (LoggingManagedHttpClientConnection.java:onRequestSubmitted(138)) - http-outgoing-0 >> Content-Type: application/octet-stream +http.headers (LoggingManagedHttpClientConnection.java:onRequestSubmitted(138)) - http-outgoing-0 >> Content-Length: 0 +http.headers (LoggingManagedHttpClientConnection.java:onRequestSubmitted(138)) - http-outgoing-0 >> Connection: Keep-Alive +http.wire (Wire.java:wire(72)) - http-outgoing-0 >> "PUT /test/ HTTP/1.1[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 >> "Host: ireland-new.s3-eu-west-1.amazonaws.com[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 >> "x-amz-content-sha256: UNSIGNED-PAYLOAD[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 >> "Authorization: AWS4-HMAC-SHA256 Credential=AKIAIYZ5JEEEER/20170904/eu-west-1/s3/aws4_request, ,,, +http.wire (Wire.java:wire(72)) - http-outgoing-0 >> "X-Amz-Date: 20170904T172929Z[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 >> "User-Agent: 3.0.0-beta-1, aws-sdk-java/1.11.134 ... +http.wire (Wire.java:wire(72)) - http-outgoing-0 >> "amz-sdk-invocation-id: 75b530f8-ad31-1ad3-13db-9bd53666b30d[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 >> "amz-sdk-retry: 0/0/500[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 >> "Content-Type: application/octet-stream[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 >> "Content-Length: 0[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 >> "Connection: Keep-Alive[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 >> "[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 << "HTTP/1.1 200 OK[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 << "x-amz-id-2: mad9GqKztzlL0cdnCKAj9GJOAs+DUjbSC5jRkO7W1E7Nk2BUmFvt81bhSNPGdZmyyKqQI9i/B/A=[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 << "x-amz-request-id: C953D2FE4ABF5C51[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 << "Date: Mon, 04 Sep 2017 17:29:30 GMT[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 << "ETag: "d41d8cd98f00b204e9800998ecf8427e"[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 << "Content-Length: 0[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 << "Server: AmazonS3[\r][\n]" +http.wire (Wire.java:wire(72)) - http-outgoing-0 << "[\r][\n]" +http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(124)) - http-outgoing-0 << HTTP/1.1 200 OK +http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << x-amz-id-2: mad9GqKztzlL0cdnCKAj9GJOAs+DUjbSC5jRkO7W1E7Nk2BUmFvt81bhSNPGdZmyyKqQI9i/B/A= +http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << x-amz-request-id: C953D2FE4ABF5C51 +http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << Date: Mon, 04 Sep 2017 17:29:30 GMT +http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << ETag: "d41d8cd98f00b204e9800998ecf8427e" +http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << Content-Length: 0 +http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << Server: AmazonS3 +execchain.MainClientExec (MainClientExec.java:execute(284)) - Connection can be kept alive for 60000 MILLISECONDS +``` diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java index 587dbbc1df1..8da8b6ad5b2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java @@ -47,7 +47,6 @@ protected int getTestTimeoutMillis() { protected Configuration createConfiguration() { Configuration newConf = super.createConfiguration(); newConf.setLong(MULTIPART_SIZE, MULTIPART_SETTING); - newConf.setBoolean(FAST_UPLOAD, true); newConf.set(FAST_UPLOAD_BUFFER, FAST_UPLOAD_BUFFER_DISK); // patch in S3Guard options maybeEnableS3Guard(newConf); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractCreate.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractCreate.java deleted file mode 100644 index 502cf5ae2dd..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractCreate.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.contract.s3n; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractCreateTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; -import org.apache.hadoop.fs.contract.ContractTestUtils; - -/** - * S3N contract tests creating files. - */ -public class ITestS3NContractCreate extends AbstractContractCreateTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new NativeS3Contract(conf); - } - - @Override - public void testOverwriteEmptyDirectory() throws Throwable { - ContractTestUtils.skip( - "blobstores can't distinguish empty directories from files"); - } -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractDelete.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractDelete.java deleted file mode 100644 index 675f979fbe2..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractDelete.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.contract.s3n; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractDeleteTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * S3A contract tests covering deletes. - */ -public class ITestS3NContractDelete extends AbstractContractDeleteTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new NativeS3Contract(conf); - } -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractMkdir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractMkdir.java deleted file mode 100644 index 3c566f347a7..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractMkdir.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.contract.s3n; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractMkdirTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * Test dir operations on S3. - */ -public class ITestS3NContractMkdir extends AbstractContractMkdirTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new NativeS3Contract(conf); - } -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractOpen.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractOpen.java deleted file mode 100644 index 7ebfc4e3cc7..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractOpen.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.contract.s3n; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractOpenTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * S3N contract tests opening files. - */ -public class ITestS3NContractOpen extends AbstractContractOpenTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new NativeS3Contract(conf); - } -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractRename.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractRename.java deleted file mode 100644 index effe9eb2e67..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractRename.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.contract.s3n; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractRenameTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * S3N contract tests covering rename. - */ -public class ITestS3NContractRename extends AbstractContractRenameTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new NativeS3Contract(conf); - } - -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractRootDir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractRootDir.java deleted file mode 100644 index 3fdf868b074..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractRootDir.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.contract.s3n; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractRootDirectoryTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * Root dir operations against an S3 bucket. - */ -public class ITestS3NContractRootDir extends - AbstractContractRootDirectoryTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new NativeS3Contract(conf); - } -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractSeek.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractSeek.java deleted file mode 100644 index 9e1ce730972..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/ITestS3NContractSeek.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.contract.s3n; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractSeekTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * S3N contract tests covering file seek. - */ -public class ITestS3NContractSeek extends AbstractContractSeekTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new NativeS3Contract(conf); - } -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/NativeS3Contract.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/NativeS3Contract.java deleted file mode 100644 index 5796d882752..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/NativeS3Contract.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.contract.s3n; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.contract.AbstractBondedFSContract; - -/** - * The contract of S3N: only enabled if the test bucket is provided. - */ -public class NativeS3Contract extends AbstractBondedFSContract { - - public static final String CONTRACT_XML = "contract/s3n.xml"; - - - public NativeS3Contract(Configuration conf) { - super(conf); - //insert the base features - addConfResource(CONTRACT_XML); - } - - @Override - public String getScheme() { - return "s3n"; - } - - @Override - public Path getTestPath() { - String testUniqueForkId = System.getProperty("test.unique.fork.id"); - return testUniqueForkId == null ? super.getTestPath() : - new Path("/" + testUniqueForkId, "test"); - } -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputArray.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputArray.java index 87f676c3c84..afd3ec2bd34 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputArray.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputArray.java @@ -56,7 +56,6 @@ protected Configuration createConfiguration() { S3ATestUtils.disableFilesystemCaching(conf); conf.setLong(MIN_MULTIPART_THRESHOLD, MULTIPART_MIN_SIZE); conf.setInt(MULTIPART_SIZE, MULTIPART_MIN_SIZE); - conf.setBoolean(Constants.FAST_UPLOAD, true); conf.set(FAST_UPLOAD_BUFFER, getBlockOutputBufferName()); return conf; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContractGetFileStatusV1List.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContractGetFileStatusV1List.java new file mode 100644 index 00000000000..527533682f8 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContractGetFileStatusV1List.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractGetFileStatusTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.s3a.S3AContract; + +import static org.apache.hadoop.fs.s3a.Constants.LIST_VERSION; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard; + +/** + * S3A contract tests for getFileStatus, using the v1 List Objects API. + */ +public class ITestS3AContractGetFileStatusV1List + extends AbstractContractGetFileStatusTest { + + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new S3AContract(conf); + } + + @Override + public void teardown() throws Exception { + getLog().info("FS details {}", getFileSystem()); + super.teardown(); + } + + @Override + protected Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + disableFilesystemCaching(conf); + conf.setInt(Constants.MAX_PAGING_KEYS, 2); + maybeEnableS3Guard(conf); + + // Use v1 List Objects API + conf.setInt(LIST_VERSION, 1); + return conf; + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSECBlockOutputStream.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSECBlockOutputStream.java index afa04412935..8991badd836 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSECBlockOutputStream.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSECBlockOutputStream.java @@ -31,7 +31,6 @@ public class ITestS3AEncryptionSSECBlockOutputStream @Override protected Configuration createConfiguration() { Configuration conf = super.createConfiguration(); - conf.setBoolean(Constants.FAST_UPLOAD, true); conf.set(Constants.FAST_UPLOAD_BUFFER, Constants.FAST_UPLOAD_BYTEBUFFER); conf.set(Constants.SERVER_SIDE_ENCRYPTION_KEY, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKeyBlockOutputStream.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKeyBlockOutputStream.java index 8ce3a137914..4c953bd2897 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKeyBlockOutputStream.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKeyBlockOutputStream.java @@ -39,9 +39,7 @@ protected Configuration createConfiguration() { skip(Constants.SERVER_SIDE_ENCRYPTION_KEY+ " is not set for " + S3AEncryptionMethods.SSE_KMS.getMethod()); } - conf.setBoolean(Constants.FAST_UPLOAD, true); - conf.set(Constants.FAST_UPLOAD_BUFFER, - Constants.FAST_UPLOAD_BYTEBUFFER); + conf.set(Constants.FAST_UPLOAD_BUFFER, Constants.FAST_UPLOAD_BYTEBUFFER); return conf; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSES3BlockOutputStream.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSES3BlockOutputStream.java index 407601f1a03..ff9c07a7d5a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSES3BlockOutputStream.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSES3BlockOutputStream.java @@ -29,7 +29,6 @@ public class ITestS3AEncryptionSSES3BlockOutputStream @Override protected Configuration createConfiguration() { Configuration conf = super.createConfiguration(); - conf.setBoolean(Constants.FAST_UPLOAD, true); conf.set(Constants.FAST_UPLOAD_BUFFER, Constants.FAST_UPLOAD_BYTEBUFFER); //must specify encryption key as empty because SSE-S3 does not allow it, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java index 6cff5339244..da7699edaa0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.ObjectListing; +import com.amazonaws.services.s3.model.ListObjectsV2Result; import com.amazonaws.services.s3.AmazonS3; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -488,6 +488,10 @@ public void testCommitByRenameOperations() throws Throwable { @Test public void testInconsistentS3ClientDeletes() throws Throwable { + // Test only implemented for v2 S3 list API + Assume.assumeTrue(getConfiguration() + .getInt(LIST_VERSION, DEFAULT_LIST_VERSION) == 2); + S3AFileSystem fs = getFileSystem(); Path root = path("testInconsistentClient" + DEFAULT_DELAY_KEY_SUBSTRING); for (int i = 0; i < 3; i++) { @@ -502,17 +506,17 @@ public void testInconsistentS3ClientDeletes() throws Throwable { AmazonS3 client = fs.getAmazonS3Client(); String key = fs.pathToKey(root) + "/"; - ObjectListing preDeleteDelimited = client.listObjects( - fs.createListObjectsRequest(key, "/")); - ObjectListing preDeleteUndelimited = client.listObjects( - fs.createListObjectsRequest(key, null)); + ListObjectsV2Result preDeleteDelimited = client.listObjectsV2( + fs.createListObjectsRequest(key, "/").getV2()); + ListObjectsV2Result preDeleteUndelimited = client.listObjectsV2( + fs.createListObjectsRequest(key, null).getV2()); fs.delete(root, true); - ObjectListing postDeleteDelimited = client.listObjects( - fs.createListObjectsRequest(key, "/")); - ObjectListing postDeleteUndelimited = client.listObjects( - fs.createListObjectsRequest(key, null)); + ListObjectsV2Result postDeleteDelimited = client.listObjectsV2( + fs.createListObjectsRequest(key, "/").getV2()); + ListObjectsV2Result postDeleteUndelimited = client.listObjectsV2( + fs.createListObjectsRequest(key, null).getV2()); assertEquals("InconsistentAmazonS3Client added back objects incorrectly " + "in a non-recursive listing", diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java index 58e4d3074bc..586264d1e76 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java @@ -25,9 +25,12 @@ import java.io.FileNotFoundException; import java.util.Collections; import java.util.Date; +import java.util.List; import com.amazonaws.services.s3.model.GetObjectMetadataRequest; import com.amazonaws.services.s3.model.ListObjectsRequest; +import com.amazonaws.services.s3.model.ListObjectsV2Request; +import com.amazonaws.services.s3.model.ListObjectsV2Result; import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.S3ObjectSummary; @@ -93,12 +96,7 @@ public void testImplicitDirectory() throws Exception { when(s3.getObjectMetadata(argThat( correctGetMetadataRequest(BUCKET, key + "/")) )).thenThrow(NOT_FOUND); - ObjectListing objects = mock(ObjectListing.class); - when(objects.getCommonPrefixes()).thenReturn( - Collections.singletonList("dir/")); - when(objects.getObjectSummaries()).thenReturn( - Collections.emptyList()); - when(s3.listObjects(any(ListObjectsRequest.class))).thenReturn(objects); + setupListMocks(Collections.singletonList("dir/"), Collections.emptyList()); FileStatus stat = fs.getFileStatus(path); assertNotNull(stat); assertEquals(fs.makeQualified(path), stat.getPath()); @@ -118,12 +116,7 @@ public void testRoot() throws Exception { when(s3.getObjectMetadata(argThat( correctGetMetadataRequest(BUCKET, key + "/") ))).thenThrow(NOT_FOUND); - ObjectListing objects = mock(ObjectListing.class); - when(objects.getCommonPrefixes()).thenReturn( - Collections.emptyList()); - when(objects.getObjectSummaries()).thenReturn( - Collections.emptyList()); - when(s3.listObjects(any(ListObjectsRequest.class))).thenReturn(objects); + setupListMocks(Collections.emptyList(), Collections.emptyList()); FileStatus stat = fs.getFileStatus(path); assertNotNull(stat); assertEquals(fs.makeQualified(path), stat.getPath()); @@ -140,16 +133,28 @@ public void testNotFound() throws Exception { when(s3.getObjectMetadata(argThat( correctGetMetadataRequest(BUCKET, key + "/") ))).thenThrow(NOT_FOUND); - ObjectListing objects = mock(ObjectListing.class); - when(objects.getCommonPrefixes()).thenReturn( - Collections.emptyList()); - when(objects.getObjectSummaries()).thenReturn( - Collections.emptyList()); - when(s3.listObjects(any(ListObjectsRequest.class))).thenReturn(objects); + setupListMocks(Collections.emptyList(), Collections.emptyList()); exception.expect(FileNotFoundException.class); fs.getFileStatus(path); } + private void setupListMocks(List prefixes, + List summaries) { + + // V1 list API mock + ObjectListing objects = mock(ObjectListing.class); + when(objects.getCommonPrefixes()).thenReturn(prefixes); + when(objects.getObjectSummaries()).thenReturn(summaries); + when(s3.listObjects(any(ListObjectsRequest.class))).thenReturn(objects); + + // V2 list API mock + ListObjectsV2Result v2Result = mock(ListObjectsV2Result.class); + when(v2Result.getCommonPrefixes()).thenReturn(prefixes); + when(v2Result.getObjectSummaries()).thenReturn(summaries); + when(s3.listObjectsV2(any(ListObjectsV2Request.class))) + .thenReturn(v2Result); + } + private Matcher correctGetMetadataRequest( String bucket, String key) { return new BaseMatcher() { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java index 8b163cbee63..230dbad5511 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java @@ -98,7 +98,6 @@ protected Configuration createScaleConfiguration() { conf.setLong(MIN_MULTIPART_THRESHOLD, partitionSize); conf.setInt(MULTIPART_SIZE, partitionSize); conf.set(USER_AGENT_PREFIX, "STestS3AHugeFileCreate"); - conf.setBoolean(FAST_UPLOAD, true); conf.set(FAST_UPLOAD_BUFFER, getBlockOutputBufferName()); return conf; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/ITestJets3tNativeFileSystemStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/ITestJets3tNativeFileSystemStore.java deleted file mode 100644 index cfe622c5785..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/ITestJets3tNativeFileSystemStore.java +++ /dev/null @@ -1,133 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3native; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; - -import static org.junit.Assert.*; -import static org.junit.Assume.*; - -import org.junit.Before; -import org.junit.After; -import org.junit.BeforeClass; -import org.junit.Test; - -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.URI; -import java.security.DigestInputStream; -import java.security.DigestOutputStream; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; - -/** - * S3N tests through live S3 service. - */ -public class ITestJets3tNativeFileSystemStore { - private Configuration conf; - private Jets3tNativeFileSystemStore store; - private NativeS3FileSystem fs; - - @Before - public void setUp() throws Exception { - conf = new Configuration(); - store = new Jets3tNativeFileSystemStore(); - fs = new NativeS3FileSystem(store); - conf.setBoolean("fs.s3n.multipart.uploads.enabled", true); - conf.setLong("fs.s3n.multipart.uploads.block.size", 64 * 1024 * 1024); - fs.initialize(URI.create(conf.get("test.fs.s3n.name")), conf); - } - - @After - public void tearDown() throws Exception { - try { - store.purge("test"); - } catch (Exception e) {} - } - - @BeforeClass - public static void checkSettings() throws Exception { - Configuration conf = new Configuration(); - assumeNotNull(conf.get("fs.s3n.awsAccessKeyId")); - assumeNotNull(conf.get("fs.s3n.awsSecretAccessKey")); - assumeNotNull(conf.get("test.fs.s3n.name")); - } - - protected void writeRenameReadCompare(Path path, long len) - throws IOException, NoSuchAlgorithmException { - // If len > fs.s3n.multipart.uploads.block.size, - // we'll use a multipart upload copy - MessageDigest digest = MessageDigest.getInstance("MD5"); - OutputStream out = new BufferedOutputStream( - new DigestOutputStream(fs.create(path, false), digest)); - for (long i = 0; i < len; i++) { - out.write('Q'); - } - out.flush(); - out.close(); - - assertTrue("Exists", fs.exists(path)); - - // Depending on if this file is over 5 GB or not, - // rename will cause a multipart upload copy - Path copyPath = path.suffix(".copy"); - fs.rename(path, copyPath); - - assertTrue("Copy exists", fs.exists(copyPath)); - - // Download file from S3 and compare the digest against the original - MessageDigest digest2 = MessageDigest.getInstance("MD5"); - InputStream in = new BufferedInputStream( - new DigestInputStream(fs.open(copyPath), digest2)); - long copyLen = 0; - while (in.read() != -1) { - copyLen++; - } - in.close(); - - assertEquals("Copy length matches original", len, copyLen); - assertArrayEquals("Digests match", digest.digest(), digest2.digest()); - } - - @Test - public void testSmallUpload() throws IOException, NoSuchAlgorithmException { - // Regular upload, regular copy - writeRenameReadCompare(new Path("/test/small"), 16384); - } - - @Test - public void testMediumUpload() throws IOException, NoSuchAlgorithmException { - // Multipart upload, regular copy - writeRenameReadCompare(new Path("/test/medium"), 33554432); // 100 MB - } - - /* - Enable Multipart upload to run this test - @Test - public void testExtraLargeUpload() - throws IOException, NoSuchAlgorithmException { - // Multipart upload, multipart copy - writeRenameReadCompare(new Path("/test/xlarge"), 5368709121L); // 5GB+1byte - } - */ -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/InMemoryNativeFileSystemStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/InMemoryNativeFileSystemStore.java deleted file mode 100644 index c082493c9c6..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/InMemoryNativeFileSystemStore.java +++ /dev/null @@ -1,213 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3native; - -import static org.apache.hadoop.fs.s3native.NativeS3FileSystem.PATH_DELIMITER; -import static org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys.S3_NATIVE_BUFFER_DIR_KEY; -import static org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys.addDeprecatedConfigKeys; - -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.SortedMap; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.Map.Entry; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.util.Time; - -/** - *

- * A stub implementation of {@link NativeFileSystemStore} for testing - * {@link NativeS3FileSystem} without actually connecting to S3. - *

- */ -public class InMemoryNativeFileSystemStore implements NativeFileSystemStore { - - static { - // Add the deprecated config keys - addDeprecatedConfigKeys(); - } - - private Configuration conf; - - private SortedMap metadataMap = - new TreeMap(); - private SortedMap dataMap = new TreeMap(); - - @Override - public void initialize(URI uri, Configuration conf) throws IOException { - this.conf = conf; - } - - @Override - public void storeEmptyFile(String key) throws IOException { - metadataMap.put(key, new FileMetadata(key, 0, Time.now())); - dataMap.put(key, new byte[0]); - } - - @Override - public void storeFile(String key, File file, byte[] md5Hash) - throws IOException { - - ByteArrayOutputStream out = new ByteArrayOutputStream(); - byte[] buf = new byte[8192]; - int numRead; - BufferedInputStream in = null; - try { - in = new BufferedInputStream(new FileInputStream(file)); - while ((numRead = in.read(buf)) >= 0) { - out.write(buf, 0, numRead); - } - } finally { - if (in != null) { - in.close(); - } - } - metadataMap.put(key, - new FileMetadata(key, file.length(), Time.now())); - dataMap.put(key, out.toByteArray()); - } - - @Override - public InputStream retrieve(String key) throws IOException { - return retrieve(key, 0); - } - - @Override - public InputStream retrieve(String key, long byteRangeStart) - throws IOException { - - byte[] data = dataMap.get(key); - File file = createTempFile(); - BufferedOutputStream out = null; - try { - out = new BufferedOutputStream(new FileOutputStream(file)); - out.write(data, (int) byteRangeStart, - data.length - (int) byteRangeStart); - } finally { - if (out != null) { - out.close(); - } - } - return new FileInputStream(file); - } - - private File createTempFile() throws IOException { - File dir = new File(conf.get(S3_NATIVE_BUFFER_DIR_KEY)); - if (!dir.exists() && !dir.mkdirs()) { - throw new IOException("Cannot create S3 buffer directory: " + dir); - } - File result = File.createTempFile("test-", ".tmp", dir); - result.deleteOnExit(); - return result; - } - - @Override - public FileMetadata retrieveMetadata(String key) throws IOException { - return metadataMap.get(key); - } - - @Override - public PartialListing list(String prefix, int maxListingLength) - throws IOException { - return list(prefix, maxListingLength, null, false); - } - - @Override - public PartialListing list(String prefix, int maxListingLength, - String priorLastKey, boolean recursive) throws IOException { - - return list(prefix, recursive ? null : PATH_DELIMITER, maxListingLength, priorLastKey); - } - - private PartialListing list(String prefix, String delimiter, - int maxListingLength, String priorLastKey) throws IOException { - - if (prefix.length() > 0 && !prefix.endsWith(PATH_DELIMITER)) { - prefix += PATH_DELIMITER; - } - - List metadata = new ArrayList(); - SortedSet commonPrefixes = new TreeSet(); - for (String key : dataMap.keySet()) { - if (key.startsWith(prefix)) { - if (delimiter == null) { - metadata.add(retrieveMetadata(key)); - } else { - int delimIndex = key.indexOf(delimiter, prefix.length()); - if (delimIndex == -1) { - metadata.add(retrieveMetadata(key)); - } else { - String commonPrefix = key.substring(0, delimIndex); - commonPrefixes.add(commonPrefix); - } - } - } - if (metadata.size() + commonPrefixes.size() == maxListingLength) { - new PartialListing(key, metadata.toArray(new FileMetadata[0]), - commonPrefixes.toArray(new String[0])); - } - } - return new PartialListing(null, metadata.toArray(new FileMetadata[0]), - commonPrefixes.toArray(new String[0])); - } - - @Override - public void delete(String key) throws IOException { - metadataMap.remove(key); - dataMap.remove(key); - } - - @Override - public void copy(String srcKey, String dstKey) throws IOException { - metadataMap.put(dstKey, metadataMap.get(srcKey)); - dataMap.put(dstKey, dataMap.get(srcKey)); - } - - @Override - public void purge(String prefix) throws IOException { - Iterator> i = - metadataMap.entrySet().iterator(); - while (i.hasNext()) { - Entry entry = i.next(); - if (entry.getKey().startsWith(prefix)) { - dataMap.remove(entry.getKey()); - i.remove(); - } - } - } - - @Override - public void dump() throws IOException { - System.out.println(metadataMap.values()); - System.out.println(dataMap.keySet()); - } -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java deleted file mode 100644 index bfbca714214..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java +++ /dev/null @@ -1,266 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3native; - -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; - -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystemContractBaseTest; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.s3native.NativeS3FileSystem.NativeS3FsInputStream; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.internal.AssumptionViolatedException; -import static org.junit.Assert.*; - -public abstract class NativeS3FileSystemContractBaseTest - extends FileSystemContractBaseTest { - public static final String KEY_TEST_FS = "test.fs.s3n.name"; - private NativeFileSystemStore store; - - abstract NativeFileSystemStore getNativeFileSystemStore() throws IOException; - - @Before - public void setUp() throws Exception { - Configuration conf = new Configuration(); - String fsname = conf.get(KEY_TEST_FS); - if (StringUtils.isEmpty(fsname)) { - throw new AssumptionViolatedException( - "No test FS defined in :" + KEY_TEST_FS); - } - store = getNativeFileSystemStore(); - fs = new NativeS3FileSystem(store); - fs.initialize(URI.create(fsname), conf); - } - - @After - public void tearDown() throws Exception { - if (store != null) { - store.purge("test"); - } - } - - @Test - public void testCanonicalName() throws Exception { - assertNull("s3n doesn't support security token and shouldn't have canonical name", - fs.getCanonicalServiceName()); - } - - @Test - public void testListStatusForRoot() throws Exception { - FileStatus[] paths = fs.listStatus(path("/")); - assertEquals("Root directory is not empty; ", 0, paths.length); - - Path testDir = path("/test"); - assertTrue(fs.mkdirs(testDir)); - - paths = fs.listStatus(path("/")); - assertEquals(1, paths.length); - assertEquals(path("/test"), paths[0].getPath()); - } - - @Test - public void testNoTrailingBackslashOnBucket() throws Exception { - assertTrue(fs.getFileStatus(new Path(fs.getUri().toString())).isDirectory()); - } - - private void createTestFiles(String base) throws IOException { - store.storeEmptyFile(base + "/file1"); - store.storeEmptyFile(base + "/dir/file2"); - store.storeEmptyFile(base + "/dir/file3"); - } - - @Test - public void testDirWithDifferentMarkersWorks() throws Exception { - - for (int i = 0; i <= 3; i++) { - String base = "test/hadoop" + i; - Path path = path("/" + base); - - createTestFiles(base); - - if (i == 0 ) { - //do nothing, we are testing correctness with no markers - } - else if (i == 1) { - // test for _$folder$ marker - store.storeEmptyFile(base + "_$folder$"); - store.storeEmptyFile(base + "/dir_$folder$"); - } - else if (i == 2) { - // test the end slash file marker - store.storeEmptyFile(base + "/"); - store.storeEmptyFile(base + "/dir/"); - } - else if (i == 3) { - // test both markers - store.storeEmptyFile(base + "_$folder$"); - store.storeEmptyFile(base + "/dir_$folder$"); - store.storeEmptyFile(base + "/"); - store.storeEmptyFile(base + "/dir/"); - } - - assertTrue(fs.getFileStatus(path).isDirectory()); - assertEquals(2, fs.listStatus(path).length); - } - } - - @Test - public void testDeleteWithNoMarker() throws Exception { - String base = "test/hadoop"; - Path path = path("/" + base); - - createTestFiles(base); - - fs.delete(path, true); - - path = path("/test"); - assertTrue(fs.getFileStatus(path).isDirectory()); - assertEquals(0, fs.listStatus(path).length); - } - - @Test - public void testRenameWithNoMarker() throws Exception { - String base = "test/hadoop"; - Path dest = path("/test/hadoop2"); - - createTestFiles(base); - - fs.rename(path("/" + base), dest); - - Path path = path("/test"); - assertTrue(fs.getFileStatus(path).isDirectory()); - assertEquals(1, fs.listStatus(path).length); - assertTrue(fs.getFileStatus(dest).isDirectory()); - assertEquals(2, fs.listStatus(dest).length); - } - - @Test - public void testEmptyFile() throws Exception { - store.storeEmptyFile("test/hadoop/file1"); - fs.open(path("/test/hadoop/file1")).close(); - } - - @Test - public void testBlockSize() throws Exception { - Path file = path("/test/hadoop/file"); - createFile(file); - assertEquals("Default block size", fs.getDefaultBlockSize(file), - fs.getFileStatus(file).getBlockSize()); - - // Block size is determined at read time - long newBlockSize = fs.getDefaultBlockSize(file) * 2; - fs.getConf().setLong("fs.s3n.block.size", newBlockSize); - assertEquals("Double default block size", newBlockSize, - fs.getFileStatus(file).getBlockSize()); - } - - @Test - public void testRetryOnIoException() throws Exception { - class TestInputStream extends InputStream { - boolean shouldThrow = true; - int throwCount = 0; - int pos = 0; - byte[] bytes; - boolean threwException = false; - - public TestInputStream() { - bytes = new byte[256]; - for (int i = pos; i < 256; i++) { - bytes[i] = (byte)i; - } - } - - @Override - public int read() throws IOException { - shouldThrow = !shouldThrow; - if (shouldThrow) { - throwCount++; - threwException = true; - throw new IOException(); - } - assertFalse("IOException was thrown. InputStream should be reopened", threwException); - return pos++; - } - - @Override - public int read(byte[] b, int off, int len) throws IOException { - shouldThrow = !shouldThrow; - if (shouldThrow) { - throwCount++; - threwException = true; - throw new IOException(); - } - assertFalse("IOException was thrown. InputStream should be reopened", threwException); - int sizeToRead = Math.min(len, 256 - pos); - for (int i = 0; i < sizeToRead; i++) { - b[i] = bytes[pos + i]; - } - pos += sizeToRead; - return sizeToRead; - } - - public void reopenAt(long byteRangeStart) { - threwException = false; - pos = Long.valueOf(byteRangeStart).intValue(); - } - - } - - final TestInputStream is = new TestInputStream(); - - class MockNativeFileSystemStore extends Jets3tNativeFileSystemStore { - @Override - public InputStream retrieve(String key, long byteRangeStart) throws IOException { - is.reopenAt(byteRangeStart); - return is; - } - } - - NativeS3FsInputStream stream = new NativeS3FsInputStream(new MockNativeFileSystemStore(), null, is, ""); - - // Test reading methods. - byte[] result = new byte[256]; - for (int i = 0; i < 128; i++) { - result[i] = (byte)stream.read(); - } - for (int i = 128; i < 256; i += 8) { - byte[] temp = new byte[8]; - int read = stream.read(temp, 0, 8); - assertEquals(8, read); - System.arraycopy(temp, 0, result, i, 8); - } - - // Assert correct - for (int i = 0; i < 256; i++) { - assertEquals((byte)i, result[i]); - } - - // Test to make sure the throw path was exercised. - // every read should have thrown 1 IOException except for the first read - // 144 = 128 - 1 + (128 / 8) - assertEquals(143, ((TestInputStream)is).throwCount); - } - -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/S3NInMemoryFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/S3NInMemoryFileSystem.java deleted file mode 100644 index c0ea85bfc7a..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/S3NInMemoryFileSystem.java +++ /dev/null @@ -1,32 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3native; - -import org.apache.hadoop.fs.s3native.NativeS3FileSystem; -import org.apache.hadoop.fs.s3native.InMemoryNativeFileSystemStore; - -/** - * A helper implementation of {@link NativeS3FileSystem} - * without actually connecting to S3 for unit testing. - */ -public class S3NInMemoryFileSystem extends NativeS3FileSystem { - public S3NInMemoryFileSystem() { - super(new InMemoryNativeFileSystemStore()); - } -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestS3Credentials.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestS3Credentials.java deleted file mode 100644 index 17b78c7a2b7..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestS3Credentials.java +++ /dev/null @@ -1,129 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.s3native; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.security.ProviderUtils; -import org.apache.hadoop.security.alias.CredentialProvider; -import org.apache.hadoop.security.alias.CredentialProviderFactory; - -import java.io.File; -import java.net.URI; - -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.rules.TestName; - -import static org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys.S3_NATIVE_AWS_ACCESS_KEY_ID; -import static org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys.S3_NATIVE_AWS_SECRET_ACCESS_KEY; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; - -/** - * This is to test the {@link S3Credentials} class for extracting AWS - * credentials. - */ -public class TestS3Credentials { - public static final Log LOG = LogFactory.getLog(TestS3Credentials.class); - - @Rule - public final TestName test = new TestName(); - - @Before - public void announce() { - LOG.info("Running test " + test.getMethodName()); - } - - private static final String EXAMPLE_ID = "AKASOMEACCESSKEY"; - private static final String EXAMPLE_KEY = - "RGV0cm9pdCBSZ/WQgY2xl/YW5lZCB1cAEXAMPLE"; - - @Test - public void testInvalidHostnameWithUnderscores() throws Exception { - S3Credentials s3Credentials = new S3Credentials(); - try { - s3Credentials.initialize(new URI("s3n://a:b@c_d"), new Configuration()); - fail("Should throw IllegalArgumentException"); - } catch (IllegalArgumentException e) { - assertEquals("Invalid hostname in URI s3n://a:b@c_d", e.getMessage()); - } - } - - @Test - public void testPlaintextConfigPassword() throws Exception { - S3Credentials s3Credentials = new S3Credentials(); - Configuration conf = new Configuration(); - conf.set(S3_NATIVE_AWS_ACCESS_KEY_ID, EXAMPLE_ID); - conf.set(S3_NATIVE_AWS_SECRET_ACCESS_KEY, EXAMPLE_KEY); - s3Credentials.initialize(new URI("s3n://foobar"), conf); - assertEquals("Could not retrieve proper access key", EXAMPLE_ID, - s3Credentials.getAccessKey()); - assertEquals("Could not retrieve proper secret", EXAMPLE_KEY, - s3Credentials.getSecretAccessKey()); - } - - @Test - public void testPlaintextConfigPasswordWithWhitespace() throws Exception { - S3Credentials s3Credentials = new S3Credentials(); - Configuration conf = new Configuration(); - conf.set(S3_NATIVE_AWS_ACCESS_KEY_ID, "\r\n " + EXAMPLE_ID + - " \r\n"); - conf.set(S3_NATIVE_AWS_SECRET_ACCESS_KEY, "\r\n " + EXAMPLE_KEY + - " \r\n"); - s3Credentials.initialize(new URI("s3n://foobar"), conf); - assertEquals("Could not retrieve proper access key", EXAMPLE_ID, - s3Credentials.getAccessKey()); - assertEquals("Could not retrieve proper secret", EXAMPLE_KEY, - s3Credentials.getSecretAccessKey()); - } - - @Rule - public final TemporaryFolder tempDir = new TemporaryFolder(); - - @Test - public void testCredentialProvider() throws Exception { - // set up conf to have a cred provider - final Configuration conf = new Configuration(); - final File file = tempDir.newFile("test.jks"); - final URI jks = ProviderUtils.nestURIForLocalJavaKeyStoreProvider( - file.toURI()); - conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, - jks.toString()); - - // add our creds to the provider - final CredentialProvider provider = - CredentialProviderFactory.getProviders(conf).get(0); - provider.createCredentialEntry(S3_NATIVE_AWS_SECRET_ACCESS_KEY, - EXAMPLE_KEY.toCharArray()); - provider.flush(); - - // make sure S3Creds can retrieve things. - S3Credentials s3Credentials = new S3Credentials(); - conf.set(S3_NATIVE_AWS_ACCESS_KEY_ID, EXAMPLE_ID); - s3Credentials.initialize(new URI("s3n://foobar"), conf); - assertEquals("Could not retrieve proper access key", EXAMPLE_ID, - s3Credentials.getAccessKey()); - assertEquals("Could not retrieve proper secret", EXAMPLE_KEY, - s3Credentials.getSecretAccessKey()); - } - -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestS3NInMemoryFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestS3NInMemoryFileSystem.java deleted file mode 100644 index b457df21b09..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestS3NInMemoryFileSystem.java +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3native; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.net.URI; - -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; - -public class TestS3NInMemoryFileSystem extends TestCase { - - private static final String TEST_PATH = "s3n://test/data.txt"; - - private static final String TEST_DATA = "Sample data for testing."; - - private S3NInMemoryFileSystem fs; - - @Override - public void setUp() throws IOException { - fs = new S3NInMemoryFileSystem(); - fs.initialize(URI.create("s3n://test/"), new Configuration()); - } - - public void testBasicReadWriteIO() throws IOException { - FSDataOutputStream writeData = fs.create(new Path(TEST_PATH)); - writeData.write(TEST_DATA.getBytes()); - writeData.flush(); - writeData.close(); - - FSDataInputStream readData = fs.open(new Path(TEST_PATH)); - BufferedReader br = new BufferedReader(new InputStreamReader(readData)); - String line = ""; - StringBuffer stringBuffer = new StringBuffer(); - while ((line = br.readLine()) != null) { - stringBuffer.append(line); - } - br.close(); - - assert(TEST_DATA.equals(stringBuffer.toString())); - } - - @Override - public void tearDown() throws IOException { - fs.close(); - } -} diff --git a/hadoop-tools/hadoop-aws/src/test/resources/contract/s3n.xml b/hadoop-tools/hadoop-aws/src/test/resources/contract/s3n.xml deleted file mode 100644 index 0c6b8c69b7f..00000000000 --- a/hadoop-tools/hadoop-aws/src/test/resources/contract/s3n.xml +++ /dev/null @@ -1,110 +0,0 @@ - - - - - - - fs.contract.test.root-tests-enabled - true - - - - fs.contract.test.random-seek-count - 10 - - - - fs.contract.is-blobstore - true - - - - fs.contract.create-overwrites-directory - true - - - - fs.contract.create-visibility-delayed - true - - - - fs.contract.is-case-sensitive - true - - - - fs.contract.rename-returns-false-if-source-missing - true - - - - fs.contract.supports-append - false - - - - fs.contract.supports-atomic-directory-delete - false - - - - fs.contract.supports-atomic-rename - false - - - - fs.contract.supports-block-locality - false - - - - fs.contract.supports-concat - false - - - - fs.contract.supports-seek - true - - - - fs.contract.supports-seek-on-closed-file - true - - - - fs.contract.rejects-seek-past-eof - true - - - - fs.contract.supports-strict-exceptions - true - - - - fs.contract.supports-unix-permissions - false - - - diff --git a/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties b/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties index 9376ebd6027..acbe7f1e2b4 100644 --- a/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties +++ b/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties @@ -32,3 +32,6 @@ log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR # Log all HTTP requests made; includes S3 interaction. This may # include sensitive information such as account IDs in HTTP headers. #log4j.logger.com.amazonaws.request=DEBUG + +# Turn on low level HTTP protocol debugging +#log4j.logger.com.amazonaws.thirdparty.apache.http=DEBUG diff --git a/hadoop-tools/hadoop-azure-datalake/pom.xml b/hadoop-tools/hadoop-azure-datalake/pom.xml index b06c7746d7c..e32f9fcb3cb 100644 --- a/hadoop-tools/hadoop-azure-datalake/pom.xml +++ b/hadoop-tools/hadoop-azure-datalake/pom.xml @@ -110,7 +110,7 @@ com.microsoft.azure azure-data-lake-store-sdk - 2.2.2 + 2.2.3 diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java index f77d98100cd..790902c6229 100644 --- a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java @@ -56,7 +56,6 @@ public final class AdlConfKeys { // MSI Auth Configuration public static final String MSI_PORT = "fs.adl.oauth2.msi.port"; - public static final String MSI_TENANT_GUID = "fs.adl.oauth2.msi.tenantguid"; // DeviceCode Auth configuration public static final String DEVICE_CODE_CLIENT_APP_ID = diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java index a5e31e153c9..a4965959dee 100644 --- a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java @@ -290,9 +290,7 @@ private AccessTokenProvider getConfRefreshTokenBasedTokenProvider( private AccessTokenProvider getMsiBasedTokenProvider( Configuration conf) throws IOException { - int port = Integer.parseInt(getNonEmptyVal(conf, MSI_PORT)); - String tenantGuid = getPasswordString(conf, MSI_TENANT_GUID); - return new MsiTokenProvider(port, tenantGuid); + return new MsiTokenProvider(conf.getInt(MSI_PORT, -1)); } private AccessTokenProvider getDeviceCodeTokenProvider( diff --git a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md index e34da36d566..ca79321efdd 100644 --- a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md +++ b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md @@ -164,15 +164,11 @@ Identity extension within the VM. The advantage of doing this is that the credentials are managed by the extension, and do not have to be put into core-site.xml. -To use MSI, the following two steps are needed: -1. Modify the VM deployment template to specify the port number of the token - service exposed to localhost by the identity extension in the VM. -2. Get your Azure ActiveDirectory Tenant ID: - 1. Go to [the portal](https://portal.azure.com) - 2. Under services in left nav, look for Azure Active Directory and click on it. - 3. Click on Properties - 4. Note down the GUID shown under "Directory ID" - this is your AAD tenant ID - +To use MSI, modify the VM deployment template to use the identity extension. Note the +port number you specified in the template: this is the port number for the REST endpoint +of the token service exposed to localhost by the identity extension in the VM. The default +recommended port number is 50342 - if the recommended port number is used, then the msi.port +setting below can be omitted in the configuration. ##### Configure core-site.xml Add the following properties to your `core-site.xml` @@ -185,12 +181,7 @@ Add the following properties to your `core-site.xml` fs.adl.oauth2.msi.port - PORT NUMBER FROM STEP 1 ABOVE - - - - fs.adl.oauth2.msi.TenantGuid - AAD TENANT ID GUID FROM STEP 2 ABOVE + PORT NUMBER FROM ABOVE (if different from the default of 50342) ``` diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java index 929b33a0427..12c2e3ffc78 100644 --- a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java @@ -43,8 +43,6 @@ import static org.apache.hadoop.fs.adl.AdlConfKeys .AZURE_AD_TOKEN_PROVIDER_TYPE_KEY; import static org.apache.hadoop.fs.adl.AdlConfKeys.DEVICE_CODE_CLIENT_APP_ID; -import static org.apache.hadoop.fs.adl.AdlConfKeys.MSI_PORT; -import static org.apache.hadoop.fs.adl.AdlConfKeys.MSI_TENANT_GUID; import static org.apache.hadoop.fs.adl.TokenProviderType.*; import static org.junit.Assert.assertEquals; @@ -107,8 +105,6 @@ public void testMSITokenProvider() throws IOException, URISyntaxException { Configuration conf = new Configuration(); conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, MSI); - conf.set(MSI_PORT, "54321"); - conf.set(MSI_TENANT_GUID, "TENANT_GUID"); URI uri = new URI("adl://localhost:8080"); AdlFileSystem fileSystem = new AdlFileSystem(); diff --git a/hadoop-tools/hadoop-azure/pom.xml b/hadoop-tools/hadoop-azure/pom.xml index 0c5ac631d63..b479872f07b 100644 --- a/hadoop-tools/hadoop-azure/pom.xml +++ b/hadoop-tools/hadoop-azure/pom.xml @@ -34,6 +34,15 @@ UTF-8 true + ${project.build.directory}/test + + unset + + unset + + unset + + 7200 @@ -224,4 +233,246 @@ + + + + parallel-tests + + + parallel-tests + + + + + + maven-antrun-plugin + + + create-parallel-tests-dirs + test-compile + + + + + + + run + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + default-test + + test + + + 1 + ${testsThreadCount} + false + ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true + ${fs.azure.scale.test.timeout} + + ${test.build.data}/${surefire.forkNumber} + ${test.build.dir}/${surefire.forkNumber} + ${hadoop.tmp.dir}/${surefire.forkNumber} + fork-${surefire.forkNumber} + ${fs.azure.scale.test.enabled} + ${fs.azure.scale.test.huge.filesize} + ${fs.azure.scale.test.huge.partitionsize} + ${fs.azure.scale.test.timeout} + + + **/Test*.java + + + **/TestRollingWindowAverage*.java + + + + + serialized-test + + test + + + 1 + false + ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true + ${fs.azure.scale.test.timeout} + + ${test.build.data}/${surefire.forkNumber} + ${test.build.dir}/${surefire.forkNumber} + ${hadoop.tmp.dir}/${surefire.forkNumber} + fork-${surefire.forkNumber} + ${fs.azure.scale.test.enabled} + ${fs.azure.scale.test.huge.filesize} + ${fs.azure.scale.test.huge.partitionsize} + ${fs.azure.scale.test.timeout} + + + **/TestRollingWindowAverage*.java + + + + + + + org.apache.maven.plugins + maven-failsafe-plugin + + + default-integration-test + + integration-test + verify + + + ${testsThreadCount} + false + ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true + ${fs.azure.scale.test.timeout} + + + true + ${test.build.data}/${surefire.forkNumber} + ${test.build.dir}/${surefire.forkNumber} + ${hadoop.tmp.dir}/${surefire.forkNumber} + + + + + + fork-${surefire.forkNumber} + + ${fs.azure.scale.test.enabled} + ${fs.azure.scale.test.huge.filesize} + ${fs.azure.scale.test.huge.partitionsize} + ${fs.azure.scale.test.timeout} + + + + + + + + + + + + + + **/ITest*.java + + + **/ITestFileSystemOperationsExceptionHandlingMultiThreaded.java + **/ITestFileSystemOperationsWithThreads.java + **/ITestOutOfBandAzureBlobOperationsLive.java + **/ITestNativeAzureFileSystemAuthorizationWithOwner.java + **/ITestNativeAzureFileSystemConcurrencyLive.java + **/ITestNativeAzureFileSystemLive.java + **/ITestNativeAzureFSPageBlobLive.java + **/ITestWasbRemoteCallHelper.java + **/ITestBlockBlobInputStream.java + + + + + + + sequential-integration-tests + + integration-test + verify + + + ${fs.azure.scale.test.timeout} + + false + ${fs.azure.scale.test.enabled} + ${fs.azure.scale.test.huge.filesize} + ${fs.azure.scale.test.huge.partitionsize} + ${fs.azure.scale.test.timeout} + + + **/ITestFileSystemOperationsExceptionHandlingMultiThreaded.java + **/ITestFileSystemOperationsWithThreads.java + **/ITestOutOfBandAzureBlobOperationsLive.java + **/ITestNativeAzureFileSystemAuthorizationWithOwner.java + **/ITestNativeAzureFileSystemConcurrencyLive.java + **/ITestNativeAzureFileSystemLive.java + **/ITestWasbRemoteCallHelper.java + **/ITestBlockBlobInputStream.java + + + + + + + + + + sequential-tests + + + !parallel-tests + + + + + + org.apache.maven.plugins + maven-failsafe-plugin + + + + integration-test + verify + + + + + ${fs.azure.scale.test.enabled} + ${fs.azure.scale.test.huge.filesize} + ${fs.azure.scale.test.timeout} + + ${fs.azure.scale.test.timeout} + + + + + + + + + + + scale + + + scale + + + + true + + + diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java index 639862fa859..f1031b43843 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java @@ -346,7 +346,7 @@ public class AzureNativeFileSystemStore implements NativeFileSystemStore { private String delegationToken; /** The error message template when container is not accessible. */ - static final String NO_ACCESS_TO_CONTAINER_MSG = "No credentials found for " + public static final String NO_ACCESS_TO_CONTAINER_MSG = "No credentials found for " + "account %s in the configuration, and its container %s is not " + "accessible using anonymous credentials. Please check if the container " + "exists first. If it is not publicly available, you have to provide " diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/index.md b/hadoop-tools/hadoop-azure/src/site/markdown/index.md index 466bf0b7895..876d7cc9a61 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/index.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/index.md @@ -519,96 +519,8 @@ The maximum number of entries that that cache can hold can be customized using t true ``` -## Testing the hadoop-azure Module -The hadoop-azure module includes a full suite of unit tests. Most of the tests -will run without additional configuration by running `mvn test`. This includes -tests against mocked storage, which is an in-memory emulation of Azure Storage. - -A selection of tests can run against the -[Azure Storage Emulator](http://msdn.microsoft.com/en-us/library/azure/hh403989.aspx) -which is a high-fidelity emulation of live Azure Storage. The emulator is -sufficient for high-confidence testing. The emulator is a Windows executable -that runs on a local machine. - -To use the emulator, install Azure SDK 2.3 and start the storage emulator. Then, -edit `src/test/resources/azure-test.xml` and add the following property: - -```xml - - fs.azure.test.emulator - true - -``` - -There is a known issue when running tests with the emulator. You may see the -following failure message: - - com.microsoft.windowsazure.storage.StorageException: The value for one of the HTTP headers is not in the correct format. - -To resolve this, restart the Azure Emulator. Ensure it v3.2 or later. - -It's also possible to run tests against a live Azure Storage account by saving a -file to `src/test/resources/azure-auth-keys.xml` and setting -the name of the storage account and its access key. - -For example: - -```xml - - - - - fs.azure.test.account.name - {ACCOUNTNAME}.blob.core.windows.net - - - fs.azure.account.key.{ACCOUNTNAME}.blob.core.windows.net - {ACCOUNT ACCESS KEY} - - -``` - -To run contract tests, set the WASB file system URI in `src/test/resources/azure-auth-keys.xml` -and the account access key. For example: - -```xml - - - - - fs.contract.test.fs.wasb - wasb://{CONTAINERNAME}@{ACCOUNTNAME}.blob.core.windows.net - The name of the azure file system for testing. - - - fs.azure.account.key.{ACCOUNTNAME}.blob.core.windows.net - {ACCOUNT ACCESS KEY} - - -``` - -Overall, to run all the tests using `mvn test`, a sample `azure-auth-keys.xml` is like following: - -```xml - - - - - fs.azure.test.account.name - {ACCOUNTNAME}.blob.core.windows.net - - - fs.azure.account.key.{ACCOUNTNAME}.blob.core.windows.net - {ACCOUNT ACCESS KEY} - - - fs.contract.test.fs.wasb - wasb://{CONTAINERNAME}@{ACCOUNTNAME}.blob.core.windows.net - - -``` - -DO NOT ADD `azure-auth-keys.xml` TO REVISION CONTROL. The keys to your Azure -Storage account are a secret and must not be shared. +## Further Reading +* [Testing the Azure WASB client](testing_azure.html). +* MSDN article, [Understanding Block Blobs, Append Blobs, and Page Blobs](https://docs.microsoft.com/en-us/rest/api/storageservices/understanding-block-blobs--append-blobs--and-page-blobs) diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md b/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md new file mode 100644 index 00000000000..b58e68be5f3 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md @@ -0,0 +1,576 @@ + + +# Testing the Azure WASB client + + + +This module includes both unit tests, which can run in isolation without +connecting to the Azure Storage service, and integration tests, which require a working +connection to interact with a container. Unit test suites follow the naming +convention `Test*.java`. Integration tests follow the naming convention +`ITest*.java`. + +## Policy for submitting patches which affect the `hadoop-azure` module. + +The Apache Jenkins infrastucture does not run any cloud integration tests, +due to the need to keep credentials secure. + +### The submitter of any patch is required to run all the integration tests and declare which Azure region they used. + +This is important: **patches which do not include this declaration will be ignored** + +This policy has proven to be the only mechanism to guarantee full regression +testing of code changes. Why the declaration of region? Two reasons + +1. It helps us identify regressions which only surface against specific endpoints. +1. It forces the submitters to be more honest about their testing. It's easy +to lie, "yes, I tested this". To say "yes, I tested this against Azure US-west" +is a more specific lie and harder to make. And, if you get caught out: you +lose all credibility with the project. + +You don't need to test from a VM within the Azure infrastructure, all you need +are credentials. + +It's neither hard nor expensive to run the tests; if you can't, +there's no guarantee your patch works. The reviewers have enough to do, and +don't have the time to do these tests, especially as every failure will simply +make for a slow iterative development. + +Please: run the tests. And if you don't, we are sorry for declining your +patch, but we have to. + + +### What if there's an intermittent failure of a test? + +Some of the tests do fail intermittently, especially in parallel runs. +If this happens, try to run the test on its own to see if the test succeeds. + +If it still fails, include this fact in your declaration. We know some tests +are intermittently unreliable. + +### What if the tests are timing out or failing over my network connection? + +The tests are designed to be configurable for different +timeouts. If you are seeing problems and this configuration isn't working, +that's a sign of the configuration mechanism isn't complete. If it's happening +in the production code, that could be a sign of a problem which may surface +over long-haul connections. Please help us identify and fix these problems +— especially as you are the one best placed to verify the fixes work. + +## Setting up the tests + +## Testing the `hadoop-azure` Module + +The `hadoop-azure` module includes a full suite of unit tests. Many of the tests +will run without additional configuration by running `mvn test`. This includes +tests against mocked storage, which is an in-memory emulation of Azure Storage. + +The integration tests are designed to test directly against an Azure storage +service, and require an account and credentials in order to run. + +This is done by creating the file to `src/test/resources/azure-auth-keys.xml` +and setting the name of the storage account and its access key. + +For example: + +```xml + + + + + fs.azure.test.account.name + {ACCOUNTNAME}.blob.core.windows.net + + + fs.azure.account.key.{ACCOUNTNAME}.blob.core.windows.net + {ACCOUNT ACCESS KEY} + + +``` + +To run contract tests, set the WASB file system URI in `src/test/resources/azure-auth-keys.xml` +and the account access key. For example: + +```xml + + + + + fs.contract.test.fs.wasb + wasb://{CONTAINERNAME}@{ACCOUNTNAME}.blob.core.windows.net + The name of the azure file system for testing. + + + fs.azure.account.key.{ACCOUNTNAME}.blob.core.windows.net + {ACCOUNT ACCESS KEY} + + +``` + +Overall, to run all the tests using `mvn test`, a sample `azure-auth-keys.xml` is like following: + +```xml + + + + + fs.azure.test.account.name + {ACCOUNTNAME}.blob.core.windows.net + + + fs.azure.account.key.{ACCOUNTNAME}.blob.core.windows.net + {ACCOUNT ACCESS KEY} + + + fs.contract.test.fs.wasb + wasb://{CONTAINERNAME}@{ACCOUNTNAME}.blob.core.windows.net + + +``` + +DO NOT ADD `azure-auth-keys.xml` TO REVISION CONTROL. The keys to your Azure +Storage account are a secret and must not be shared. + + +## Running the Tests + +After completing the configuration, execute the test run through Maven. + +```bash +mvn -T 1C clean verify +``` + +It's also possible to execute multiple test suites in parallel by passing the +`parallel-tests` property on the command line. The tests spend most of their +time blocked on network I/O, so running in parallel tends to +complete full test runs faster. + +```bash +mvn -T 1C -Dparallel-tests clean verify +``` + +Some tests must run with exclusive access to the storage container, so even with the +`parallel-tests` property, several test suites will run in serial in a separate +Maven execution step after the parallel tests. + +By default, `parallel-tests` runs 4 test suites concurrently. This can be tuned +by passing the `testsThreadCount` property. + +```bash +mvn -T 1C -Dparallel-tests -DtestsThreadCount=8 clean verify +``` + + + +```bash +mvn -T 1C clean test + +mvn -T 1C -Dparallel-tests clean test + +mvn -T 1C -Dparallel-tests -DtestsThreadCount=8 clean test +``` + +To run only a specific named subset of tests, pass the `test` property for unit +tests or the `it.test` property for integration tests. + +```bash +mvn -T 1C clean test -Dtest=TestRollingWindowAverage + +mvn -T 1C clean verify -Dscale -Dit.test=ITestFileSystemOperationExceptionMessage -Dtest=none + +mvn -T 1C clean verify -Dtest=none -Dit.test=ITest* + +``` + +Note + +1. When running a specific subset of tests, the patterns passed in `test` +and `it.test` override the configuration of which tests need to run in isolation +in a separate serial phase (mentioned above). This can cause unpredictable +results, so the recommendation is to avoid passing `parallel-tests` in +combination with `test` or `it.test`. If you know that you are specifying only +tests that can run safely in parallel, then it will work. For wide patterns, +like `ITest*` shown above, it may cause unpredictable test failures. + +2. The command line shell may try to expand the "*" and sometimes the "#" symbols +in test patterns. In such situations, escape the character it with a "\\" prefix. +Example: + + mvn -T 1C clean verify -Dtest=none -Dit.test=ITest\* + + +## Viewing the results + +Integration test results and logs are stored in `target/failsafe-reports/`. +An HTML report can be generated during site generation, or with the `surefire-report` +plugin: + +```bash + +# for the unit tests +mvn -T 1C surefire-report:report-only + +# for the integration tests +mvn -T 1C surefire-report:failsafe-report-only + +# all reports for this module +mvn -T 1C site:site +``` + +## Scale Tests + +There are a set of tests designed to measure the scalability and performance +at scale of the filesystem client, *Scale Tests*. Tests include: creating +and traversing directory trees, uploading large files, renaming them, +deleting them, seeking through the files, performing random IO, and others. +This makes them a foundational part of the benchmarking. + +By their very nature they are slow. And, as their execution time is often +limited by bandwidth between the computer running the tests and the Azure endpoint, +parallel execution does not speed these tests up. + +### Enabling the Scale Tests + +The tests are enabled if the `scale` property is set in the maven build +this can be done regardless of whether or not the parallel test profile +is used + +```bash +mvn -T 1C verify -Dscale + +mvn -T 1C verify -Dparallel-tests -Dscale -DtestsThreadCount=8 +``` + +The most bandwidth intensive tests (those which upload data) always run +sequentially; those which are slow due to HTTPS setup costs or server-side +actions are included in the set of parallelized tests. + + +### Scale test tuning options + + +Some of the tests can be tuned from the maven build or from the +configuration file used to run the tests. + +```bash +mvn -T 1C verify -Dparallel-tests -Dscale -DtestsThreadCount=8 -Dfs.azure.scale.test.huge.filesize=128M +``` + +The algorithm is + +1. The value is queried from the configuration file, using a default value if +it is not set. +1. The value is queried from the JVM System Properties, where it is passed +down by maven. +1. If the system property is null, an empty string, or it has the value `unset`, +then the configuration value is used. The `unset` option is used to +[work round a quirk in maven property propagation](http://stackoverflow.com/questions/7773134/null-versus-empty-arguments-in-maven). + +Only a few properties can be set this way; more will be added. + +| Property | Meaninging | +|-----------|-------------| +| `fs.azure.scale.test.huge.filesize`| Size for huge file uploads | +| `fs.azure.scale.test.huge.huge.partitionsize`| Size for partitions in huge file uploads | + +The file and partition sizes are numeric values with a k/m/g/t/p suffix depending +on the desired size. For example: 128M, 128m, 2G, 2G, 4T or even 1P. + +#### Scale test configuration options + +Some scale tests perform multiple operations (such as creating many directories). + +The exact number of operations to perform is configurable in the option +`scale.test.operation.count` + +```xml + + scale.test.operation.count + 10 + +``` + +Larger values generate more load, and are recommended when testing locally, +or in batch runs. + +Smaller values results in faster test runs, especially when the object +store is a long way away. + +Operations which work on directories have a separate option: this controls +the width and depth of tests creating recursive directories. Larger +values create exponentially more directories, with consequent performance +impact. + +```xml + + scale.test.directory.count + 2 + +``` + +DistCp tests targeting Azure support a configurable file size. The default is +10 MB, but the configuration value is expressed in KB so that it can be tuned +smaller to achieve faster test runs. + +```xml + + scale.test.distcp.file.size.kb + 10240 + +``` + +Azure-specific scale test properties are + +##### `fs.azure.scale.test.huge.filesize`: size in MB for "Huge file tests". + +The Huge File tests validate Azure storages's ability to handle large files —the property +`fs.azure.scale.test.huge.filesize` declares the file size to use. + +```xml + + fs.azure.scale.test.huge.filesize + 200M + +``` + +Tests at this scale are slow: they are best executed from hosts running in +the cloud infrastructure where the storage endpoint is based. + +## Using the emulator + +A selection of tests can run against the +[Azure Storage Emulator](http://msdn.microsoft.com/en-us/library/azure/hh403989.aspx) +which is a high-fidelity emulation of live Azure Storage. The emulator is +sufficient for high-confidence testing. The emulator is a Windows executable +that runs on a local machine. + +To use the emulator, install Azure SDK 2.3 and start the storage emulator. Then, +edit `src/test/resources/azure-test.xml` and add the following property: + +```xml + + fs.azure.test.emulator + true + +``` + +There is a known issue when running tests with the emulator. You may see the +following failure message: + + com.microsoft.windowsazure.storage.StorageException: The value for one of the HTTP headers is not in the correct format. + +To resolve this, restart the Azure Emulator. Ensure it is v3.2 or later. + + +## Debugging Test failures + +Logging at debug level is the standard way to provide more diagnostics output; +after setting this rerun the tests + +```properties +log4j.logger.org.apache.hadoop.fs.azure=DEBUG +``` + +## Adding new tests + +New tests are always welcome. Bear in mind that we need to keep costs +and test time down, which is done by + +* Not duplicating tests. +* Being efficient in your use of Hadoop API calls. +* Isolating large/slow tests into the "scale" test group. +* Designing all tests to execute in parallel (where possible). +* Adding new probes and predicates into existing tests, albeit carefully. + +*No duplication*: if an operation is tested elsewhere, don't repeat it. This +applies as much for metadata operations as it does for bulk IO. If a new +test case is added which completely obsoletes an existing test, it is OK +to cut the previous one —after showing that coverage is not worsened. + +*Efficient*: prefer the `getFileStatus()` and examining the results, rather than +call to `exists()`, `isFile()`, etc. + +*Fail with useful information:* provide as much diagnostics as possible +on a failure. Using `org.apache.hadoop.fs.contract.ContractTestUtils` to make +assertions about the state of a filesystem helps here. + +*Isolating Scale tests*. Any test doing large amounts of IO MUST extend the +class `AbstractAzureScaleTest`, so only running if `scale` is defined on a build, +supporting test timeouts configurable by the user. Scale tests should also +support configurability as to the actual size of objects/number of operations, +so that behavior at different scale can be verified. + +*Designed for parallel execution*. A key need here is for each test suite to work +on isolated parts of the filesystem. Subclasses of `AbstractWasbTestBase` +SHOULD use the `path()`, `methodpath()` and `blobpath()` methods, +to build isolated paths. Tests MUST NOT assume that they have exclusive access +to a bucket. + +*Extending existing tests where appropriate*. This recommendation goes +against normal testing best practise of "test one thing per method". +Because it is so slow to create directory trees or upload large files, we do +not have that luxury. All the tests against real endpoints are integration +tests where sharing test setup and teardown saves time and money. + +A standard way to do this is to extend existing tests with some extra predicates, +rather than write new tests. When doing this, make sure that the new predicates +fail with meaningful diagnostics, so any new problems can be easily debugged +from test logs. + + +### Requirements of new Tests + + +This is what we expect from new tests; they're an extension of the normal +Hadoop requirements, based on the need to work with remote servers whose +use requires the presence of secret credentials, where tests may be slow, +and where finding out why something failed from nothing but the test output +is critical. + +#### Subclasses Existing Shared Base Blasses + +There are a set of base classes which should be extended for Azure tests and +integration tests. + +##### `org.apache.hadoop.fs.azure.AbstractWasbTestWithTimeout` + +This extends the junit `Assert` class with thread names and timeouts, +the default timeout being set in `AzureTestConstants.AZURE_TEST_TIMEOUT` to +ten minutes. The thread names are set to aid analyzing the stack trace of +a test: a `jstack` call can be used to + +##### `org.apache.hadoop.fs.azure.AbstractWasbTestBase` + +The base class for tests which use `AzureBlobStorageTestAccount` to create +mock or live Azure clients; in test teardown it tries to clean up store state. + +1. This class requires subclasses to implement `createTestAccount()` to create +a mock or real test account. + +1. The configuration used to create a test account *should* be that from +`createConfiguration()`; this can be extended in subclasses to tune the settings. + + +##### `org.apache.hadoop.fs.azure.integration.AbstractAzureScaleTest` + +This extends `AbstractWasbTestBase` for scale tests; those test which +only run when `-Dscale` is used to select the "scale" profile. +These tests have a timeout of 30 minutes, so as to support slow test runs. + +Having shared base classes help reduces future maintenance. Please +use them. + +#### Secure + +Don't ever log credentials. The credential tests go out of their way to +not provide meaningful logs or assertion messages precisely to avoid this. + +#### Efficient of Time and Money + +This means efficient in test setup/teardown, and, ideally, making use of +existing public datasets to save setup time and tester cost. + + +The reference example is `ITestAzureHugeFiles`:. This marks the test suite as +`@FixMethodOrder(MethodSorters.NAME_ASCENDING)` then orders the test cases such +that each test case expects the previous test to have completed (here: uploaded a file, +renamed a file, ...). This provides for independent tests in the reports, yet still +permits an ordered sequence of operations. Do note the use of `Assume.assume()` +to detect when the preconditions for a single test case are not met, hence, +the tests become skipped, rather than fail with a trace which is really a false alarm. + + +### Works Over Long-haul Links + +As well as making file size and operation counts scaleable, this includes +making test timeouts adequate. The Scale tests make this configurable; it's +hard coded to ten minutes in `AbstractAzureIntegrationTest()`; subclasses can +change this by overriding `getTestTimeoutMillis()`. + +Equally importantly: support proxies, as some testers need them. + + +### Provides Diagnostics and timing information + +1. Create logs, log things. +1. you can use `AbstractWasbTestBase.describe(format-string, args)` here; it +adds some newlines so as to be easier to spot. +1. Use `ContractTestUtils.NanoTimer` to measure the duration of operations, +and log the output. + +#### Fails Meaningfully + +The `ContractTestUtils` class contains a whole set of assertions for making +statements about the expected state of a filesystem, e.g. +`assertPathExists(FS, path)`, `assertPathDoesNotExists(FS, path)`, and others. +These do their best to provide meaningful diagnostics on failures (e.g. directory +listings, file status, ...), so help make failures easier to understand. + +At the very least, *do not use `assertTrue()` or `assertFalse()` without +including error messages*. + + +### Cleans Up Afterwards + +Keeps costs down. + +1. Do not only cleanup if a test case completes successfully; test suite +teardown must do it. +1. That teardown code must check for the filesystem and other fields being +null before the cleanup. Why? If test setup fails, the teardown methods still +get called. + +### Works Reliably + +We really appreciate this — you will too. + + +## Tips + +### How to keep your credentials really safe + +Although the `auth-keys.xml` file is marged as ignored in git and subversion, +it is still in your source tree, and there's always that risk that it may +creep out. + +You can avoid this by keeping your keys outside the source tree and +using an absolute XInclude reference to it. + +```xml + + + + + +``` + +### Cleaning up Containers + +The Azure tests create containers with the prefix `"wasbtests-"` and delete +them after the test runs. If a test run is interrupted, these containers +may not get deleted. There is a special test case which can be manually invoked +to list and delete these, `CleanupTestContainers` + +```bash +mvn test -Dtest=CleanupTestContainers +``` + +This will delete the containers; the output log of the test run will +provide the details and summary of the operation. diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AbstractWasbTestBase.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AbstractWasbTestBase.java index d04a19ca3e3..0d3a06c36f2 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AbstractWasbTestBase.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AbstractWasbTestBase.java @@ -18,15 +18,21 @@ package org.apache.hadoop.fs.azure; -import static org.junit.Assume.assumeNotNull; +import java.io.IOException; -import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.junit.After; import org.junit.Before; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azure.integration.AzureTestConstants; +import org.apache.hadoop.io.IOUtils; + +import static org.junit.Assume.assumeNotNull; +import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.*; + /** * Abstract test class that provides basic setup and teardown of testing Azure * Storage account. Each subclass defines a different set of test cases to run @@ -34,41 +40,137 @@ * to run those tests. The returned account might integrate with Azure Storage * directly or it might be a mock implementation. */ -public abstract class AbstractWasbTestBase { +public abstract class AbstractWasbTestBase extends AbstractWasbTestWithTimeout + implements AzureTestConstants { protected static final Logger LOG = LoggerFactory.getLogger(AbstractWasbTestBase.class); - @VisibleForTesting protected NativeAzureFileSystem fs; - private AzureBlobStorageTestAccount testAccount; + protected AzureBlobStorageTestAccount testAccount; @Before public void setUp() throws Exception { - testAccount = createTestAccount(); - if (testAccount != null) { - fs = testAccount.getFileSystem(); - } - assumeNotNull(testAccount); + AzureBlobStorageTestAccount account = createTestAccount(); + assumeNotNull(account); + bindToTestAccount(account); } @After public void tearDown() throws Exception { - if (testAccount != null) { - testAccount.cleanup(); - testAccount = null; - fs = null; - } + describe("closing test account and filesystem"); + testAccount = cleanupTestAccount(testAccount); + IOUtils.closeStream(fs); + fs = null; } - public Configuration getConfiguration() { - return new Configuration(); + /** + * Create the configuration to use when creating a test account. + * Subclasses can override this to tune the test account configuration. + * @return a configuration. + */ + public Configuration createConfiguration() { + return AzureBlobStorageTestAccount.createTestConfiguration(); } + /** + * Create the test account. + * Subclasses must implement this. + * @return the test account. + * @throws Exception + */ protected abstract AzureBlobStorageTestAccount createTestAccount() throws Exception; + /** + * Get the test account. + * @return the current test account. + */ protected AzureBlobStorageTestAccount getTestAccount() { return testAccount; } + + /** + * Get the filesystem + * @return the current filesystem. + */ + protected NativeAzureFileSystem getFileSystem() { + return fs; + } + + /** + * Get the configuration used to create the filesystem + * @return the configuration of the test FS + */ + protected Configuration getConfiguration() { + return getFileSystem().getConf(); + } + + /** + * Bind to a new test account; closing any existing one. + * This updates the test account returned in {@link #getTestAccount()} + * and the filesystem in {@link #getFileSystem()}. + * @param account new test account + */ + protected void bindToTestAccount(AzureBlobStorageTestAccount account) { + // clean any existing test account + cleanupTestAccount(testAccount); + IOUtils.closeStream(fs); + testAccount = account; + if (testAccount != null) { + fs = testAccount.getFileSystem(); + } + } + + /** + * Return a path to a blob which will be unique for this fork. + * @param filepath filepath + * @return a path under the default blob directory + * @throws IOException + */ + protected Path blobPath(String filepath) throws IOException { + return blobPathForTests(getFileSystem(), filepath); + } + + /** + * Create a path under the test path provided by + * the FS contract. + * @param filepath path string in + * @return a path qualified by the test filesystem + * @throws IOException IO problems + */ + protected Path path(String filepath) throws IOException { + return pathForTests(getFileSystem(), filepath); + } + + /** + * Return a path bonded to this method name, unique to this fork during + * parallel execution. + * @return a method name unique to (fork, method). + * @throws IOException IO problems + */ + protected Path methodPath() throws IOException { + return path(methodName.getMethodName()); + } + + /** + * Return a blob path bonded to this method name, unique to this fork during + * parallel execution. + * @return a method name unique to (fork, method). + * @throws IOException IO problems + */ + protected Path methodBlobPath() throws IOException { + return blobPath(methodName.getMethodName()); + } + + /** + * Describe a test in the logs. + * @param text text to print + * @param args arguments to format in the printing + */ + protected void describe(String text, Object... args) { + LOG.info("\n\n{}: {}\n", + methodName.getMethodName(), + String.format(text, args)); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AbstractWasbTestWithTimeout.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AbstractWasbTestWithTimeout.java new file mode 100644 index 00000000000..b7076a41ba2 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AbstractWasbTestWithTimeout.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azure; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.rules.TestName; +import org.junit.rules.Timeout; + +import org.apache.hadoop.fs.azure.integration.AzureTestConstants; + +/** + * Base class for any Wasb test with timeouts & named threads. + * This class does not attempt to bind to Azure. + */ +public class AbstractWasbTestWithTimeout extends Assert { + + /** + * The name of the current method. + */ + @Rule + public TestName methodName = new TestName(); + /** + * Set the timeout for every test. + * This is driven by the value returned by {@link #getTestTimeoutMillis()}. + */ + @Rule + public Timeout testTimeout = new Timeout(getTestTimeoutMillis()); + + /** + * Name the junit thread for the class. This will overridden + * before the individual test methods are run. + */ + @BeforeClass + public static void nameTestThread() { + Thread.currentThread().setName("JUnit"); + } + + /** + * Name the thread to the current test method. + */ + @Before + public void nameThread() { + Thread.currentThread().setName("JUnit-" + methodName.getMethodName()); + } + + /** + * Override point: the test timeout in milliseconds. + * @return a timeout in milliseconds + */ + protected int getTestTimeoutMillis() { + return AzureTestConstants.AZURE_TEST_TIMEOUT; + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AzureBlobStorageTestAccount.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AzureBlobStorageTestAccount.java index 7fa59ce9427..5b36c8793ca 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AzureBlobStorageTestAccount.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AzureBlobStorageTestAccount.java @@ -21,12 +21,15 @@ import com.microsoft.azure.storage.*; import com.microsoft.azure.storage.blob.*; import com.microsoft.azure.storage.core.Base64; -import org.apache.commons.configuration2.SubsetConfiguration; +import org.junit.Assert; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.commons.configuration2.SubsetConfiguration; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azure.integration.AzureTestConstants; import org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation; import org.apache.hadoop.fs.azure.metrics.AzureFileSystemMetricsSystem; import org.apache.hadoop.metrics2.AbstractMetric; @@ -35,6 +38,8 @@ import org.apache.hadoop.metrics2.MetricsTag; import org.apache.hadoop.metrics2.impl.TestMetricsConfig; +import java.io.File; +import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.*; @@ -46,10 +51,10 @@ /** * Helper class to create WASB file systems backed by either a mock in-memory - * implementation or a real Azure Storage account. See RunningLiveWasbTests.txt - * for instructions on how to connect to a real Azure Storage account. + * implementation or a real Azure Storage account. */ -public final class AzureBlobStorageTestAccount { +public final class AzureBlobStorageTestAccount implements AutoCloseable, + AzureTestConstants { private static final Logger LOG = LoggerFactory.getLogger( AzureBlobStorageTestAccount.class); @@ -166,6 +171,7 @@ public static Path pageBlobPath() { return new Path("/" + DEFAULT_PAGE_BLOB_DIRECTORY); } + @Deprecated public static Path pageBlobPath(String fileName) { return new Path(pageBlobPath(), fileName); } @@ -201,6 +207,9 @@ public Number getLatestMetricValue(String metricName, Number defaultValue) * @return */ private boolean wasGeneratedByMe(MetricsRecord currentRecord) { + Assert.assertNotNull("null filesystem", fs); + Assert.assertNotNull("null filesystemn instance ID", + fs.getInstrumentation().getFileSystemInstanceId()); String myFsId = fs.getInstrumentation().getFileSystemInstanceId().toString(); for (MetricsTag currentTag : currentRecord.tags()) { if (currentTag.name().equalsIgnoreCase("wasbFileSystemId")) { @@ -247,13 +256,16 @@ public void releaseLease(String leaseID, String blobKey) throws Exception { getBlobReference(blobKey).releaseLease(accessCondition); } - private static void saveMetricsConfigFile() { + private static void saveMetricsConfigFile() throws IOException { if (!metricsConfigSaved) { + String testFilename = TestMetricsConfig.getTestFilename( + "hadoop-metrics2-azure-file-system"); + File dest = new File(testFilename).getCanonicalFile(); + dest.getParentFile().mkdirs(); new org.apache.hadoop.metrics2.impl.ConfigBuilder() .add("azure-file-system.sink.azuretestcollector.class", StandardCollector.class.getName()) - .save(TestMetricsConfig.getTestFilename( - "hadoop-metrics2-azure-file-system.properties")); + .save(testFilename); metricsConfigSaved = true; } } @@ -314,9 +326,8 @@ public static AzureBlobStorageTestAccount createForEmulator() Configuration conf = createTestConfiguration(); if (!conf.getBoolean(USE_EMULATOR_PROPERTY_NAME, false)) { // Not configured to test against the storage emulator. - LOG.warn("Skipping emulator Azure test because configuration doesn't " - + "indicate that it's running. Please see RunningLiveWasbTests.txt " - + "for guidance."); + LOG.warn("Skipping emulator Azure test because configuration " + + "doesn't indicate that it's running."); return null; } CloudStorageAccount account = @@ -482,8 +493,7 @@ static CloudStorageAccount createStorageAccount(String accountName, credentials = StorageCredentialsAnonymous.ANONYMOUS; } else { LOG.warn("Skipping live Azure test because of missing key for" - + " account '" + accountName + "'. " - + "Please see RunningLiveWasbTests.txt for guidance."); + + " account '" + accountName + "'."); return null; } } else { @@ -517,8 +527,7 @@ static CloudStorageAccount createTestAccount(Configuration conf) throws URISyntaxException, KeyProviderException { String testAccountName = conf.get(TEST_ACCOUNT_NAME_PROPERTY_NAME); if (testAccountName == null) { - LOG.warn("Skipping live Azure test because of missing test account. " - + "Please see RunningLiveWasbTests.txt for guidance."); + LOG.warn("Skipping live Azure test because of missing test account"); return null; } return createStorageAccount(testAccountName, conf, false); @@ -863,6 +872,11 @@ public void cleanup() throws Exception { } } + @Override + public void close() throws Exception { + cleanup(); + } + public NativeAzureFileSystem getFileSystem() { return fs; } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestAzureConcurrentOutOfBandIo.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureConcurrentOutOfBandIo.java similarity index 76% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestAzureConcurrentOutOfBandIo.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureConcurrentOutOfBandIo.java index a10a3666303..7e733dcf3e4 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestAzureConcurrentOutOfBandIo.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureConcurrentOutOfBandIo.java @@ -18,21 +18,26 @@ package org.apache.hadoop.fs.azure; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; -import static org.junit.Assume.assumeNotNull; - -import java.io.*; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.util.Arrays; -import org.apache.hadoop.fs.azure.AzureException; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.azure.integration.AzureTestUtils; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.PermissionStatus; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -public class TestAzureConcurrentOutOfBandIo { +/** + * Handle OOB IO into a shared container. + */ +public class ITestAzureConcurrentOutOfBandIo extends AbstractWasbTestBase { + + private static final Logger LOG = + LoggerFactory.getLogger(ITestAzureConcurrentOutOfBandIo.class); // Class constants. static final int DOWNLOAD_BLOCK_SIZE = 8 * 1024 * 1024; @@ -42,22 +47,10 @@ public class TestAzureConcurrentOutOfBandIo { // Number of blocks to be written before flush. static final int NUMBER_OF_BLOCKS = 2; - protected AzureBlobStorageTestAccount testAccount; - - // Overridden TestCase methods. - @Before - public void setUp() throws Exception { - testAccount = AzureBlobStorageTestAccount.createOutOfBandStore( + @Override + protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + return AzureBlobStorageTestAccount.createOutOfBandStore( UPLOAD_BLOCK_SIZE, DOWNLOAD_BLOCK_SIZE); - assumeNotNull(testAccount); - } - - @After - public void tearDown() throws Exception { - if (testAccount != null) { - testAccount.cleanup(); - testAccount = null; - } } class DataBlockWriter implements Runnable { @@ -119,13 +112,11 @@ public void run() { outputStream.close(); } } catch (AzureException e) { - System.out - .println("DatablockWriter thread encountered a storage exception." - + e.getMessage()); + LOG.error("DatablockWriter thread encountered a storage exception." + + e.getMessage(), e); } catch (IOException e) { - System.out - .println("DatablockWriter thread encountered an I/O exception." - + e.getMessage()); + LOG.error("DatablockWriter thread encountered an I/O exception." + + e.getMessage(), e); } } } @@ -140,10 +131,11 @@ public void testReadOOBWrites() throws Exception { // // Write five 4 MB blocks to the blob. To ensure there is data in the blob before // reading. This eliminates the race between the reader and writer threads. - OutputStream outputStream = testAccount.getStore().storefile( - "WASB_String.txt", + String key = "WASB_String" + AzureTestUtils.getForkID() + ".txt"; + OutputStream outputStream = testAccount.getStore().storefile( + key, new PermissionStatus("", "", FsPermission.getDefault()), - "WASB_String.txt"); + key); Arrays.fill(dataBlockWrite, (byte) 255); for (int i = 0; i < NUMBER_OF_BLOCKS; i++) { outputStream.write(dataBlockWrite); @@ -153,15 +145,12 @@ public void testReadOOBWrites() throws Exception { outputStream.close(); // Start writing blocks to Azure store using the DataBlockWriter thread. - DataBlockWriter writeBlockTask = new DataBlockWriter(testAccount, - "WASB_String.txt"); - writeBlockTask.startWriting(); + DataBlockWriter writeBlockTask = new DataBlockWriter(testAccount, key); + writeBlockTask.startWriting(); int count = 0; - InputStream inputStream = null; for (int i = 0; i < 5; i++) { - try { - inputStream = testAccount.getStore().retrieve("WASB_String.txt"); + try(InputStream inputStream = testAccount.getStore().retrieve(key)) { count = 0; int c = 0; @@ -179,11 +168,6 @@ public void testReadOOBWrites() throws Exception { e.printStackTrace(); fail(); } - - // Close the stream. - if (null != inputStream){ - inputStream.close(); - } } // Stop writing blocks. @@ -192,4 +176,4 @@ public void testReadOOBWrites() throws Exception { // Validate that a block was read. assertEquals(NUMBER_OF_BLOCKS * UPLOAD_BLOCK_SIZE, count); } -} \ No newline at end of file +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureConcurrentOutOfBandIoWithSecureMode.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureConcurrentOutOfBandIoWithSecureMode.java new file mode 100644 index 00000000000..2b0ea56821c --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureConcurrentOutOfBandIoWithSecureMode.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azure; + +/** + * Extends ITestAzureConcurrentOutOfBandIo in order to run testReadOOBWrites with secure mode + * (fs.azure.secure.mode) both enabled and disabled. + */ +public class ITestAzureConcurrentOutOfBandIoWithSecureMode + extends ITestAzureConcurrentOutOfBandIo { + + @Override + protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + return AzureBlobStorageTestAccount.createOutOfBandStore( + UPLOAD_BLOCK_SIZE, DOWNLOAD_BLOCK_SIZE, true); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestAzureFileSystemErrorConditions.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureFileSystemErrorConditions.java similarity index 84% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestAzureFileSystemErrorConditions.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureFileSystemErrorConditions.java index c98522417f4..49e67302947 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestAzureFileSystemErrorConditions.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureFileSystemErrorConditions.java @@ -18,12 +18,6 @@ package org.apache.hadoop.fs.azure; -import static org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.NO_ACCESS_TO_CONTAINER_MSG; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assume.assumeNotNull; - import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -31,34 +25,41 @@ import java.net.URI; import java.util.Arrays; import java.util.HashMap; +import java.util.concurrent.Callable; + +import com.microsoft.azure.storage.OperationContext; +import com.microsoft.azure.storage.SendingRequestEvent; +import com.microsoft.azure.storage.StorageEvent; +import org.junit.Test; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.TestHookOperationContext; import org.apache.hadoop.test.GenericTestUtils; -import org.junit.Test; +import static org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.NO_ACCESS_TO_CONTAINER_MSG; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.junit.Assume.assumeNotNull; -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.SendingRequestEvent; -import com.microsoft.azure.storage.StorageEvent; - -public class TestAzureFileSystemErrorConditions { +/** + * Error handling. + */ +public class ITestAzureFileSystemErrorConditions extends + AbstractWasbTestWithTimeout { private static final int ALL_THREE_FILE_SIZE = 1024; @Test public void testNoInitialize() throws Exception { - AzureNativeFileSystemStore store = new AzureNativeFileSystemStore(); - boolean passed = false; - try { - store.retrieveMetadata("foo"); - passed = true; - } catch (AssertionError e) { - } - assertFalse( - "Doing an operation on the store should throw if not initalized.", - passed); + intercept(AssertionError.class, + new Callable() { + @Override + public FileMetadata call() throws Exception { + return new AzureNativeFileSystemStore() + .retrieveMetadata("foo"); + } + }); } /** @@ -89,8 +90,7 @@ public void testAccessContainerWithWrongVersion() throws Exception { AzureNativeFileSystemStore store = new AzureNativeFileSystemStore(); MockStorageInterface mockStorage = new MockStorageInterface(); store.setAzureStorageInteractionLayer(mockStorage); - FileSystem fs = new NativeAzureFileSystem(store); - try { + try (FileSystem fs = new NativeAzureFileSystem(store)) { Configuration conf = new Configuration(); AzureBlobStorageTestAccount.setMockAccountKey(conf); HashMap metadata = new HashMap(); @@ -99,19 +99,17 @@ public void testAccessContainerWithWrongVersion() throws Exception { mockStorage.addPreExistingContainer( AzureBlobStorageTestAccount.getMockContainerUri(), metadata); - boolean passed = false; - try { - fs.initialize(new URI(AzureBlobStorageTestAccount.MOCK_WASB_URI), conf); - fs.listStatus(new Path("/")); - passed = true; - } catch (AzureException ex) { - assertTrue("Unexpected exception message: " + ex, - ex.getMessage().contains("unsupported version: 2090-04-05.")); - } - assertFalse("Should've thrown an exception because of the wrong version.", - passed); - } finally { - fs.close(); + AzureException ex = intercept(AzureException.class, + new Callable() { + @Override + public FileStatus[] call() throws Exception { + fs.initialize(new URI(AzureBlobStorageTestAccount.MOCK_WASB_URI), + conf); + return fs.listStatus(new Path("/")); + } + }); + GenericTestUtils.assertExceptionContains( + "unsupported version: 2090-04-05.", ex); } } @@ -120,7 +118,7 @@ private interface ConnectionRecognizer { } private class TransientErrorInjector extends StorageEvent { - final ConnectionRecognizer connectionRecognizer; + private final ConnectionRecognizer connectionRecognizer; private boolean injectedErrorOnce = false; public TransientErrorInjector(ConnectionRecognizer connectionRecognizer) { @@ -129,7 +127,8 @@ public TransientErrorInjector(ConnectionRecognizer connectionRecognizer) { @Override public void eventOccurred(SendingRequestEvent eventArg) { - HttpURLConnection connection = (HttpURLConnection)eventArg.getConnectionObject(); + HttpURLConnection connection + = (HttpURLConnection) eventArg.getConnectionObject(); if (!connectionRecognizer.isTargetConnection(connection)) { return; } @@ -178,10 +177,10 @@ public boolean isTargetConnection(HttpURLConnection connection) { private void writeAllThreeFile(NativeAzureFileSystem fs, Path testFile) throws IOException { byte[] buffer = new byte[ALL_THREE_FILE_SIZE]; - Arrays.fill(buffer, (byte)3); - OutputStream stream = fs.create(testFile); - stream.write(buffer); - stream.close(); + Arrays.fill(buffer, (byte) 3); + try(OutputStream stream = fs.create(testFile)) { + stream.write(buffer); + } } private void readAllThreeFile(NativeAzureFileSystem fs, Path testFile) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobDataValidation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlobDataValidation.java similarity index 89% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobDataValidation.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlobDataValidation.java index ea17b62c193..0aa93935fb0 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobDataValidation.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlobDataValidation.java @@ -20,10 +20,6 @@ import static org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.KEY_CHECK_BLOCK_MD5; import static org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.KEY_STORE_BLOB_MD5; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.fail; import static org.junit.Assume.assumeNotNull; import java.io.ByteArrayInputStream; @@ -33,9 +29,12 @@ import java.net.HttpURLConnection; import java.util.Arrays; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.TestHookOperationContext; +import org.apache.hadoop.fs.azure.integration.AzureTestUtils; + import org.junit.After; import org.junit.Test; @@ -54,15 +53,12 @@ * Test that we do proper data integrity validation with MD5 checks as * configured. */ -public class TestBlobDataValidation { +public class ITestBlobDataValidation extends AbstractWasbTestWithTimeout { private AzureBlobStorageTestAccount testAccount; @After public void tearDown() throws Exception { - if (testAccount != null) { - testAccount.cleanup(); - testAccount = null; - } + testAccount = AzureTestUtils.cleanupTestAccount(testAccount); } /** @@ -86,12 +82,23 @@ public void testStoreBlobMd5() throws Exception { testStoreBlobMd5(true); } + /** + * Trims a suffix/prefix from the given string. For example if + * s is given as "/xy" and toTrim is "/", this method returns "xy" + */ + private static String trim(String s, String toTrim) { + return StringUtils.removeEnd(StringUtils.removeStart(s, toTrim), + toTrim); + } + private void testStoreBlobMd5(boolean expectMd5Stored) throws Exception { assumeNotNull(testAccount); // Write a test file. - String testFileKey = "testFile"; - Path testFilePath = new Path("/" + testFileKey); - OutputStream outStream = testAccount.getFileSystem().create(testFilePath); + NativeAzureFileSystem fs = testAccount.getFileSystem(); + Path testFilePath = AzureTestUtils.pathForTests(fs, + methodName.getMethodName()); + String testFileKey = trim(testFilePath.toUri().getPath(), "/"); + OutputStream outStream = fs.create(testFilePath); outStream.write(new byte[] { 5, 15 }); outStream.close(); @@ -114,7 +121,7 @@ private void testStoreBlobMd5(boolean expectMd5Stored) throws Exception { // Now read back the content. If we stored the MD5 for the blob content // we should get a data corruption error. - InputStream inStream = testAccount.getFileSystem().open(testFilePath); + InputStream inStream = fs.open(testFilePath); try { byte[] inBuf = new byte[100]; while (inStream.read(inBuf) > 0){ diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobTypeSpeedDifference.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlobTypeSpeedDifference.java similarity index 85% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobTypeSpeedDifference.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlobTypeSpeedDifference.java index afb16efc676..b46ad5b4903 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobTypeSpeedDifference.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlobTypeSpeedDifference.java @@ -18,23 +18,31 @@ package org.apache.hadoop.fs.azure; -import java.io.*; -import java.util.*; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.*; -import org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation; - -import junit.framework.*; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Arrays; +import java.util.Date; import org.junit.Test; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azure.integration.AzureTestUtils; +import org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation; + /** * A simple benchmark to find out the difference in speed between block * and page blobs. */ -public class TestBlobTypeSpeedDifference extends TestCase { +public class ITestBlobTypeSpeedDifference extends AbstractWasbTestBase { + + @Override + protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + return AzureBlobStorageTestAccount.create(); + } + /** * Writes data to the given stream of the given size, flushing every * x bytes. @@ -101,8 +109,10 @@ private static TestResult writeBlockBlobTestFile(NativeAzureFileSystem fs, */ private static TestResult writePageBlobTestFile(NativeAzureFileSystem fs, long size, long flushInterval) throws IOException { + Path testFile = AzureTestUtils.blobPathForTests(fs, + "writePageBlobTestFile"); return writeTestFile(fs, - AzureBlobStorageTestAccount.pageBlobPath("pageBlob"), + testFile, size, flushInterval); } @@ -111,16 +121,7 @@ private static TestResult writePageBlobTestFile(NativeAzureFileSystem fs, */ @Test public void testTenKbFileFrequentFlush() throws Exception { - AzureBlobStorageTestAccount testAccount = - AzureBlobStorageTestAccount.create(); - if (testAccount == null) { - return; - } - try { - testForSizeAndFlushInterval(testAccount.getFileSystem(), 10 * 1000, 500); - } finally { - testAccount.cleanup(); - } + testForSizeAndFlushInterval(getFileSystem(), 10 * 1000, 500); } /** @@ -144,7 +145,7 @@ private static void testForSizeAndFlushInterval(NativeAzureFileSystem fs, * Runs the benchmark for the given file size and flush frequency from the * command line. */ - public static void main(String argv[]) throws Exception { + public static void main(String[] argv) throws Exception { Configuration conf = new Configuration(); long size = 10 * 1000 * 1000; long flushInterval = 2000; @@ -154,7 +155,9 @@ public static void main(String argv[]) throws Exception { if (argv.length > 1) { flushInterval = Long.parseLong(argv[1]); } - testForSizeAndFlushInterval((NativeAzureFileSystem)FileSystem.get(conf), - size, flushInterval); + testForSizeAndFlushInterval( + (NativeAzureFileSystem) FileSystem.get(conf), + size, + flushInterval); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlockBlobInputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlockBlobInputStream.java similarity index 97% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlockBlobInputStream.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlockBlobInputStream.java index 0ae4012847b..07a13df11f3 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlockBlobInputStream.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlockBlobInputStream.java @@ -26,9 +26,7 @@ import java.util.concurrent.Callable; import org.junit.FixMethodOrder; -import org.junit.Rule; import org.junit.Test; -import org.junit.rules.Timeout; import org.junit.runners.MethodSorters; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,13 +38,11 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azure.integration.AbstractAzureScaleTest; +import org.apache.hadoop.fs.azure.integration.AzureTestUtils; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.contract.ContractTestUtils.NanoTimer; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; import static org.junit.Assume.assumeNotNull; import static org.apache.hadoop.test.LambdaTestUtils.*; @@ -58,9 +54,9 @@ */ @FixMethodOrder(MethodSorters.NAME_ASCENDING) -public class TestBlockBlobInputStream extends AbstractWasbTestBase { +public class ITestBlockBlobInputStream extends AbstractAzureScaleTest { private static final Logger LOG = LoggerFactory.getLogger( - TestBlockBlobInputStream.class); + ITestBlockBlobInputStream.class); private static final int KILOBYTE = 1024; private static final int MEGABYTE = KILOBYTE * KILOBYTE; private static final int TEST_FILE_SIZE = 6 * MEGABYTE; @@ -71,11 +67,8 @@ public class TestBlockBlobInputStream extends AbstractWasbTestBase { private AzureBlobStorageTestAccount accountUsingInputStreamV2; private long testFileLength; - /** - * Long test timeout. - */ - @Rule - public Timeout testTimeout = new Timeout(10 * 60 * 1000); + + private FileStatus testFileStatus; private Path hugefile; @@ -867,9 +860,15 @@ private long randomRead(int version, FileSystem fs) throws IOException { @Test public void test_999_DeleteHugeFiles() throws IOException { - ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); - fs.delete(TEST_FILE_PATH, false); - timer.end("time to delete %s", TEST_FILE_PATH); + try { + NanoTimer timer = new NanoTimer(); + NativeAzureFileSystem fs = getFileSystem(); + fs.delete(TEST_FILE_PATH, false); + timer.end("time to delete %s", TEST_FILE_PATH); + } finally { + // clean up the test account + AzureTestUtils.cleanupTestAccount(accountUsingInputStreamV1); + } } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestContainerChecks.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestContainerChecks.java similarity index 82% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestContainerChecks.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestContainerChecks.java index f6ab94d2b7e..cc3baf501d7 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestContainerChecks.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestContainerChecks.java @@ -18,17 +18,19 @@ package org.apache.hadoop.fs.azure; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; import static org.junit.Assume.assumeNotNull; import java.io.FileNotFoundException; import java.util.EnumSet; +import java.util.concurrent.Callable; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount.CreateOptions; +import org.apache.hadoop.fs.azure.integration.AzureTestUtils; +import org.apache.hadoop.test.LambdaTestUtils; + import org.junit.After; import org.junit.Assume; import org.junit.Before; @@ -41,15 +43,13 @@ /** * Tests that WASB creates containers only if needed. */ -public class TestContainerChecks { +public class ITestContainerChecks extends AbstractWasbTestWithTimeout { private AzureBlobStorageTestAccount testAccount; private boolean runningInSASMode = false; + @After public void tearDown() throws Exception { - if (testAccount != null) { - testAccount.cleanup(); - testAccount = null; - } + testAccount = AzureTestUtils.cleanup(testAccount); } @Before @@ -60,8 +60,7 @@ public void setMode() { @Test public void testContainerExistAfterDoesNotExist() throws Exception { - testAccount = AzureBlobStorageTestAccount.create("", - EnumSet.noneOf(CreateOptions.class)); + testAccount = blobStorageTestAccount(); assumeNotNull(testAccount); CloudBlobContainer container = testAccount.getRealContainer(); FileSystem fs = testAccount.getFileSystem(); @@ -93,10 +92,15 @@ public void testContainerExistAfterDoesNotExist() throws Exception { assertTrue(container.exists()); } + protected AzureBlobStorageTestAccount blobStorageTestAccount() + throws Exception { + return AzureBlobStorageTestAccount.create("", + EnumSet.noneOf(CreateOptions.class)); + } + @Test public void testContainerCreateAfterDoesNotExist() throws Exception { - testAccount = AzureBlobStorageTestAccount.create("", - EnumSet.noneOf(CreateOptions.class)); + testAccount = blobStorageTestAccount(); assumeNotNull(testAccount); CloudBlobContainer container = testAccount.getRealContainer(); FileSystem fs = testAccount.getFileSystem(); @@ -125,8 +129,7 @@ public void testContainerCreateAfterDoesNotExist() throws Exception { @Test public void testContainerCreateOnWrite() throws Exception { - testAccount = AzureBlobStorageTestAccount.create("", - EnumSet.noneOf(CreateOptions.class)); + testAccount = blobStorageTestAccount(); assumeNotNull(testAccount); CloudBlobContainer container = testAccount.getRealContainer(); FileSystem fs = testAccount.getFileSystem(); @@ -145,19 +148,25 @@ public void testContainerCreateOnWrite() throws Exception { assertFalse(container.exists()); // Neither should a read. - try { - fs.open(new Path("/foo")); - assertFalse("Should've thrown.", true); - } catch (FileNotFoundException ex) { - } + Path foo = new Path("/testContainerCreateOnWrite-foo"); + Path bar = new Path("/testContainerCreateOnWrite-bar"); + LambdaTestUtils.intercept(FileNotFoundException.class, + new Callable() { + @Override + public String call() throws Exception { + fs.open(foo).close(); + return "Stream to " + foo; + } + } + ); assertFalse(container.exists()); // Neither should a rename - assertFalse(fs.rename(new Path("/foo"), new Path("/bar"))); + assertFalse(fs.rename(foo, bar)); assertFalse(container.exists()); // But a write should. - assertTrue(fs.createNewFile(new Path("/foo"))); + assertTrue(fs.createNewFile(foo)); assertTrue(container.exists()); } @@ -176,7 +185,7 @@ public void testContainerChecksWithSas() throws Exception { // A write should just fail try { - fs.createNewFile(new Path("/foo")); + fs.createNewFile(new Path("/testContainerChecksWithSas-foo")); assertFalse("Should've thrown.", true); } catch (AzureException ex) { } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestFileSystemOperationExceptionHandling.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationExceptionHandling.java similarity index 62% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestFileSystemOperationExceptionHandling.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationExceptionHandling.java index 9ac25dd055c..a45dae48918 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestFileSystemOperationExceptionHandling.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationExceptionHandling.java @@ -24,25 +24,35 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.junit.After; -import org.junit.Assert; import org.junit.Test; +import static org.apache.hadoop.fs.azure.ExceptionHandlingTestHelper.*; -public class TestFileSystemOperationExceptionHandling +/** + * Single threaded exception handling. + */ +public class ITestFileSystemOperationExceptionHandling extends AbstractWasbTestBase { private FSDataInputStream inputStream = null; - private static Path testPath = new Path("testfile.dat"); + private Path testPath; + private Path testFolderPath; - private static Path testFolderPath = new Path("testfolder"); + @Override + public void setUp() throws Exception { + super.setUp(); + testPath = path("testfile.dat"); + testFolderPath = path("testfolder"); + } - /* + /** * Helper method that creates a InputStream to validate exceptions - * for various scenarios + * for various scenarios. */ private void setupInputStreamToTest(AzureBlobStorageTestAccount testAccount) throws Exception { @@ -50,8 +60,9 @@ private void setupInputStreamToTest(AzureBlobStorageTestAccount testAccount) FileSystem fs = testAccount.getFileSystem(); // Step 1: Create a file and write dummy data. - Path testFilePath1 = new Path("test1.dat"); - Path testFilePath2 = new Path("test2.dat"); + Path base = methodPath(); + Path testFilePath1 = new Path(base, "test1.dat"); + Path testFilePath2 = new Path(base, "test2.dat"); FSDataOutputStream outputStream = fs.create(testFilePath1); String testString = "This is a test string"; outputStream.write(testString.getBytes()); @@ -64,28 +75,28 @@ private void setupInputStreamToTest(AzureBlobStorageTestAccount testAccount) fs.rename(testFilePath1, testFilePath2); } - /* + /** * Tests a basic single threaded read scenario for Page blobs. */ @Test(expected=FileNotFoundException.class) public void testSingleThreadedPageBlobReadScenario() throws Throwable { - AzureBlobStorageTestAccount testAccount = ExceptionHandlingTestHelper.getPageBlobTestStorageAccount(); + AzureBlobStorageTestAccount testAccount = getPageBlobTestStorageAccount(); setupInputStreamToTest(testAccount); byte[] readBuffer = new byte[512]; inputStream.read(readBuffer); } - /* + /** * Tests a basic single threaded seek scenario for Page blobs. */ @Test(expected=FileNotFoundException.class) public void testSingleThreadedPageBlobSeekScenario() throws Throwable { - AzureBlobStorageTestAccount testAccount = ExceptionHandlingTestHelper.getPageBlobTestStorageAccount(); + AzureBlobStorageTestAccount testAccount = getPageBlobTestStorageAccount(); setupInputStreamToTest(testAccount); inputStream.seek(5); } - /* + /** * Test a basic single thread seek scenario for Block blobs. */ @Test(expected=FileNotFoundException.class) @@ -97,7 +108,7 @@ public void testSingleThreadBlockBlobSeekScenario() throws Throwable { inputStream.read(); } - /* + /** * Tests a basic single threaded read scenario for Block blobs. */ @Test(expected=FileNotFoundException.class) @@ -108,144 +119,147 @@ public void testSingledThreadBlockBlobReadScenario() throws Throwable{ inputStream.read(readBuffer); } - @Test(expected=FileNotFoundException.class) - /* - * Tests basic single threaded setPermission scenario + /** + * Tests basic single threaded setPermission scenario. */ + @Test(expected = FileNotFoundException.class) public void testSingleThreadedBlockBlobSetPermissionScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(createTestAccount(), testPath); + createEmptyFile(createTestAccount(), testPath); fs.delete(testPath, true); - fs.setPermission(testPath, new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); + fs.setPermission(testPath, + new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); } - @Test(expected=FileNotFoundException.class) - /* - * Tests basic single threaded setPermission scenario + /** + * Tests basic single threaded setPermission scenario. */ - public void testSingleThreadedPageBlobSetPermissionScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(ExceptionHandlingTestHelper.getPageBlobTestStorageAccount(), - testPath); + @Test(expected = FileNotFoundException.class) + public void testSingleThreadedPageBlobSetPermissionScenario() + throws Throwable { + createEmptyFile(getPageBlobTestStorageAccount(), testPath); fs.delete(testPath, true); fs.setOwner(testPath, "testowner", "testgroup"); } - @Test(expected=FileNotFoundException.class) - /* - * Tests basic single threaded setPermission scenario + /** + * Tests basic single threaded setPermission scenario. */ + @Test(expected = FileNotFoundException.class) public void testSingleThreadedBlockBlobSetOwnerScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(createTestAccount(), testPath); + createEmptyFile(createTestAccount(), testPath); fs.delete(testPath, true); fs.setOwner(testPath, "testowner", "testgroup"); } - @Test(expected=FileNotFoundException.class) - /* - * Tests basic single threaded setPermission scenario + /** + * Tests basic single threaded setPermission scenario. */ + @Test(expected = FileNotFoundException.class) public void testSingleThreadedPageBlobSetOwnerScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(ExceptionHandlingTestHelper.getPageBlobTestStorageAccount(), + createEmptyFile(getPageBlobTestStorageAccount(), testPath); fs.delete(testPath, true); - fs.setPermission(testPath, new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); + fs.setPermission(testPath, + new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); } - @Test(expected=FileNotFoundException.class) - /* - * Test basic single threaded listStatus scenario + /** + * Test basic single threaded listStatus scenario. */ + @Test(expected = FileNotFoundException.class) public void testSingleThreadedBlockBlobListStatusScenario() throws Throwable { - ExceptionHandlingTestHelper.createTestFolder(createTestAccount(), testFolderPath); - fs.delete(testFolderPath, true); - fs.listStatus(testFolderPath); - } - - @Test(expected=FileNotFoundException.class) - /* - * Test basica single threaded listStatus scenario - */ - public void testSingleThreadedPageBlobListStatusScenario() throws Throwable { - ExceptionHandlingTestHelper.createTestFolder(ExceptionHandlingTestHelper.getPageBlobTestStorageAccount(), + createTestFolder(createTestAccount(), testFolderPath); fs.delete(testFolderPath, true); fs.listStatus(testFolderPath); } - @Test - /* - * Test basic single threaded listStatus scenario + /** + * Test basic single threaded listStatus scenario. */ + @Test(expected = FileNotFoundException.class) + public void testSingleThreadedPageBlobListStatusScenario() throws Throwable { + createTestFolder(getPageBlobTestStorageAccount(), + testFolderPath); + fs.delete(testFolderPath, true); + fs.listStatus(testFolderPath); + } + + /** + * Test basic single threaded listStatus scenario. + */ + @Test public void testSingleThreadedBlockBlobRenameScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(createTestAccount(), + createEmptyFile(createTestAccount(), testPath); Path dstPath = new Path("dstFile.dat"); fs.delete(testPath, true); boolean renameResult = fs.rename(testPath, dstPath); - Assert.assertFalse(renameResult); + assertFalse(renameResult); } - @Test - /* - * Test basic single threaded listStatus scenario + /** + * Test basic single threaded listStatus scenario. */ + @Test public void testSingleThreadedPageBlobRenameScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(ExceptionHandlingTestHelper.getPageBlobTestStorageAccount(), + createEmptyFile(getPageBlobTestStorageAccount(), testPath); Path dstPath = new Path("dstFile.dat"); fs.delete(testPath, true); boolean renameResult = fs.rename(testPath, dstPath); - Assert.assertFalse(renameResult); + assertFalse(renameResult); } - @Test - /* - * Test basic single threaded listStatus scenario + /** + * Test basic single threaded listStatus scenario. */ + @Test public void testSingleThreadedBlockBlobDeleteScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(createTestAccount(), + createEmptyFile(createTestAccount(), testPath); fs.delete(testPath, true); boolean deleteResult = fs.delete(testPath, true); - Assert.assertFalse(deleteResult); + assertFalse(deleteResult); } - @Test - /* - * Test basic single threaded listStatus scenario + /** + * Test basic single threaded listStatus scenario. */ + @Test public void testSingleThreadedPageBlobDeleteScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(ExceptionHandlingTestHelper.getPageBlobTestStorageAccount(), + createEmptyFile(getPageBlobTestStorageAccount(), testPath); fs.delete(testPath, true); boolean deleteResult = fs.delete(testPath, true); - Assert.assertFalse(deleteResult); + assertFalse(deleteResult); } - @Test(expected=FileNotFoundException.class) - /* - * Test basic single threaded listStatus scenario + /** + * Test basic single threaded listStatus scenario. */ + @Test(expected = FileNotFoundException.class) public void testSingleThreadedBlockBlobOpenScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(createTestAccount(), + createEmptyFile(createTestAccount(), testPath); fs.delete(testPath, true); inputStream = fs.open(testPath); } - @Test(expected=FileNotFoundException.class) - /* - * Test basic single threaded listStatus scenario + /** + * Test delete then open a file. */ + @Test(expected = FileNotFoundException.class) public void testSingleThreadedPageBlobOpenScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(ExceptionHandlingTestHelper.getPageBlobTestStorageAccount(), + createEmptyFile(getPageBlobTestStorageAccount(), testPath); fs.delete(testPath, true); inputStream = fs.open(testPath); @@ -257,13 +271,13 @@ public void tearDown() throws Exception { inputStream.close(); } - if (fs != null && fs.exists(testPath)) { - fs.delete(testPath, true); - } + ContractTestUtils.rm(fs, testPath, true, true); + super.tearDown(); } @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + protected AzureBlobStorageTestAccount createTestAccount() + throws Exception { return AzureBlobStorageTestAccount.create(); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestFileSystemOperationExceptionMessage.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationExceptionMessage.java similarity index 65% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestFileSystemOperationExceptionMessage.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationExceptionMessage.java index e619817b896..6d5e72e57b7 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestFileSystemOperationExceptionMessage.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationExceptionMessage.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,63 +17,63 @@ */ package org.apache.hadoop.fs.azure; + import java.net.URI; import java.util.UUID; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azure.integration.AzureTestUtils; import org.apache.hadoop.test.GenericTestUtils; -import org.junit.Assert; +import com.microsoft.azure.storage.CloudStorageAccount; import org.junit.Test; import static org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.NO_ACCESS_TO_CONTAINER_MSG; +/** + * Test for error messages coming from SDK. + */ +public class ITestFileSystemOperationExceptionMessage + extends AbstractWasbTestWithTimeout { + -public class TestFileSystemOperationExceptionMessage extends - NativeAzureFileSystemBaseTest { @Test - public void testAnonymouseCredentialExceptionMessage() throws Throwable{ + public void testAnonymouseCredentialExceptionMessage() throws Throwable { Configuration conf = AzureBlobStorageTestAccount.createTestConfiguration(); + CloudStorageAccount account = + AzureBlobStorageTestAccount.createTestAccount(conf); + AzureTestUtils.assume("No test account", account != null); + String testStorageAccount = conf.get("fs.azure.test.account.name"); conf = new Configuration(); - conf.set("fs.AbstractFileSystem.wasb.impl", "org.apache.hadoop.fs.azure.Wasb"); + conf.set("fs.AbstractFileSystem.wasb.impl", + "org.apache.hadoop.fs.azure.Wasb"); conf.set("fs.azure.skip.metrics", "true"); String testContainer = UUID.randomUUID().toString(); String wasbUri = String.format("wasb://%s@%s", testContainer, testStorageAccount); - fs = new NativeAzureFileSystem(); - try { - fs.initialize(new URI(wasbUri), conf); + try(NativeAzureFileSystem filesystem = new NativeAzureFileSystem()) { + filesystem.initialize(new URI(wasbUri), conf); + fail("Expected an exception, got " + filesystem); } catch (Exception ex) { Throwable innerException = ex.getCause(); while (innerException != null - && !(innerException instanceof AzureException)) { + && !(innerException instanceof AzureException)) { innerException = innerException.getCause(); } if (innerException != null) { - String exceptionMessage = innerException.getMessage(); - if (exceptionMessage == null - || exceptionMessage.length() == 0) { - Assert.fail();} - else { - GenericTestUtils.assertExceptionContains(String.format( - NO_ACCESS_TO_CONTAINER_MSG, testStorageAccount, testContainer), - ex); - } + GenericTestUtils.assertExceptionContains(String.format( + NO_ACCESS_TO_CONTAINER_MSG, testStorageAccount, testContainer), + ex); } else { - Assert.fail(); + fail("No inner azure exception"); } } } - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } -} \ No newline at end of file +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestFileSystemOperationsExceptionHandlingMultiThreaded.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationsExceptionHandlingMultiThreaded.java similarity index 51% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestFileSystemOperationsExceptionHandlingMultiThreaded.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationsExceptionHandlingMultiThreaded.java index 1cd18ee554e..175a9ec948a 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestFileSystemOperationsExceptionHandlingMultiThreaded.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationsExceptionHandlingMultiThreaded.java @@ -20,28 +20,56 @@ import java.io.FileNotFoundException; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.junit.After; import org.junit.Test; -public class TestFileSystemOperationsExceptionHandlingMultiThreaded +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.io.IOUtils; + +import static org.apache.hadoop.fs.azure.ExceptionHandlingTestHelper.*; + +/** + * Multithreaded operations on FS, verify failures are as expected. + */ +public class ITestFileSystemOperationsExceptionHandlingMultiThreaded extends AbstractWasbTestBase { FSDataInputStream inputStream = null; - private static Path testPath = new Path("testfile.dat"); - private static Path testFolderPath = new Path("testfolder"); + private Path testPath; + private Path testFolderPath; + @Override + public void setUp() throws Exception { + super.setUp(); + testPath = path("testfile.dat"); + testFolderPath = path("testfolder"); + } - /* + @Override + protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + return AzureBlobStorageTestAccount.create(); + } + + @Override + public void tearDown() throws Exception { + + IOUtils.closeStream(inputStream); + ContractTestUtils.rm(fs, testPath, true, false); + ContractTestUtils.rm(fs, testFolderPath, true, false); + super.tearDown(); + } + + /** * Helper method to creates an input stream to test various scenarios. */ - private void getInputStreamToTest(FileSystem fs, Path testPath) throws Throwable { + private void getInputStreamToTest(FileSystem fs, Path testPath) + throws Throwable { FSDataOutputStream outputStream = fs.create(testPath); String testString = "This is a test string"; @@ -51,19 +79,21 @@ private void getInputStreamToTest(FileSystem fs, Path testPath) throws Throwable inputStream = fs.open(testPath); } - /* + /** * Test to validate correct exception is thrown for Multithreaded read - * scenario for block blobs + * scenario for block blobs. */ - @Test(expected=FileNotFoundException.class) + @Test(expected = FileNotFoundException.class) public void testMultiThreadedBlockBlobReadScenario() throws Throwable { AzureBlobStorageTestAccount testAccount = createTestAccount(); - fs = testAccount.getFileSystem(); - Path testFilePath1 = new Path("test1.dat"); - + NativeAzureFileSystem fs = testAccount.getFileSystem(); + Path base = methodPath(); + Path testFilePath1 = new Path(base, "test1.dat"); + Path renamePath = new Path(base, "test2.dat"); getInputStreamToTest(fs, testFilePath1); - Thread renameThread = new Thread(new RenameThread(fs, testFilePath1)); + Thread renameThread = new Thread( + new RenameThread(fs, testFilePath1, renamePath)); renameThread.start(); renameThread.join(); @@ -72,20 +102,24 @@ public void testMultiThreadedBlockBlobReadScenario() throws Throwable { inputStream.read(readBuffer); } - /* + /** * Test to validate correct exception is thrown for Multithreaded seek - * scenario for block blobs + * scenario for block blobs. */ - - @Test(expected=FileNotFoundException.class) + @Test(expected = FileNotFoundException.class) public void testMultiThreadBlockBlobSeekScenario() throws Throwable { +/* AzureBlobStorageTestAccount testAccount = createTestAccount(); fs = testAccount.getFileSystem(); - Path testFilePath1 = new Path("test1.dat"); +*/ + Path base = methodPath(); + Path testFilePath1 = new Path(base, "test1.dat"); + Path renamePath = new Path(base, "test2.dat"); getInputStreamToTest(fs, testFilePath1); - Thread renameThread = new Thread(new RenameThread(fs, testFilePath1)); + Thread renameThread = new Thread( + new RenameThread(fs, testFilePath1, renamePath)); renameThread.start(); renameThread.join(); @@ -94,43 +128,50 @@ public void testMultiThreadBlockBlobSeekScenario() throws Throwable { inputStream.read(); } - @Test(expected=FileNotFoundException.class) - /* - * Tests basic multi threaded setPermission scenario + /** + * Tests basic multi threaded setPermission scenario. */ - public void testMultiThreadedPageBlobSetPermissionScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(ExceptionHandlingTestHelper.getPageBlobTestStorageAccount(), + @Test(expected = FileNotFoundException.class) + public void testMultiThreadedPageBlobSetPermissionScenario() + throws Throwable { + createEmptyFile( + getPageBlobTestStorageAccount(), testPath); Thread t = new Thread(new DeleteThread(fs, testPath)); t.start(); while (t.isAlive()) { - fs.setPermission(testPath, new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); + fs.setPermission(testPath, + new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); } - fs.setPermission(testPath, new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); + fs.setPermission(testPath, + new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); } - @Test(expected=FileNotFoundException.class) - /* - * Tests basic multi threaded setPermission scenario + /** + * Tests basic multi threaded setPermission scenario. */ - public void testMultiThreadedBlockBlobSetPermissionScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(createTestAccount(), + @Test(expected = FileNotFoundException.class) + public void testMultiThreadedBlockBlobSetPermissionScenario() + throws Throwable { + createEmptyFile(createTestAccount(), testPath); Thread t = new Thread(new DeleteThread(fs, testPath)); t.start(); while (t.isAlive()) { - fs.setPermission(testPath, new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); + fs.setPermission(testPath, + new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); } - fs.setPermission(testPath, new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); + fs.setPermission(testPath, + new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); } - @Test(expected=FileNotFoundException.class) - /* - * Tests basic multi threaded setPermission scenario + /** + * Tests basic multi threaded setPermission scenario. */ + @Test(expected = FileNotFoundException.class) public void testMultiThreadedPageBlobOpenScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(createTestAccount(), + createEmptyFile(createTestAccount(), testPath); Thread t = new Thread(new DeleteThread(fs, testPath)); t.start(); @@ -143,13 +184,14 @@ public void testMultiThreadedPageBlobOpenScenario() throws Throwable { inputStream.close(); } - @Test(expected=FileNotFoundException.class) - /* - * Tests basic multi threaded setPermission scenario + /** + * Tests basic multi threaded setPermission scenario. */ + @Test(expected = FileNotFoundException.class) public void testMultiThreadedBlockBlobOpenScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(ExceptionHandlingTestHelper.getPageBlobTestStorageAccount(), + createEmptyFile( + getPageBlobTestStorageAccount(), testPath); Thread t = new Thread(new DeleteThread(fs, testPath)); t.start(); @@ -162,13 +204,13 @@ public void testMultiThreadedBlockBlobOpenScenario() throws Throwable { inputStream.close(); } - @Test(expected=FileNotFoundException.class) - /* - * Tests basic multi threaded setOwner scenario + /** + * Tests basic multi threaded setOwner scenario. */ + @Test(expected = FileNotFoundException.class) public void testMultiThreadedBlockBlobSetOwnerScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(createTestAccount(), testPath); + createEmptyFile(createTestAccount(), testPath); Thread t = new Thread(new DeleteThread(fs, testPath)); t.start(); while (t.isAlive()) { @@ -177,12 +219,13 @@ public void testMultiThreadedBlockBlobSetOwnerScenario() throws Throwable { fs.setOwner(testPath, "testowner", "testgroup"); } - @Test(expected=FileNotFoundException.class) - /* - * Tests basic multi threaded setOwner scenario + /** + * Tests basic multi threaded setOwner scenario. */ + @Test(expected = FileNotFoundException.class) public void testMultiThreadedPageBlobSetOwnerScenario() throws Throwable { - ExceptionHandlingTestHelper.createEmptyFile(ExceptionHandlingTestHelper.getPageBlobTestStorageAccount(), + createEmptyFile( + getPageBlobTestStorageAccount(), testPath); Thread t = new Thread(new DeleteThread(fs, testPath)); t.start(); @@ -192,28 +235,13 @@ public void testMultiThreadedPageBlobSetOwnerScenario() throws Throwable { fs.setOwner(testPath, "testowner", "testgroup"); } - @Test(expected=FileNotFoundException.class) - /* - * Tests basic multi threaded listStatus scenario + /** + * Tests basic multi threaded listStatus scenario. */ + @Test(expected = FileNotFoundException.class) public void testMultiThreadedBlockBlobListStatusScenario() throws Throwable { - ExceptionHandlingTestHelper.createTestFolder(createTestAccount(), testFolderPath); - Thread t = new Thread(new DeleteThread(fs, testFolderPath)); - t.start(); - while (t.isAlive()) { - fs.listStatus(testFolderPath); - } - fs.listStatus(testFolderPath); - } - - @Test(expected=FileNotFoundException.class) - /* - * Tests basic multi threaded listStatus scenario - */ - public void testMultiThreadedPageBlobListStatusScenario() throws Throwable { - - ExceptionHandlingTestHelper.createTestFolder(ExceptionHandlingTestHelper.getPageBlobTestStorageAccount(), + createTestFolder(createTestAccount(), testFolderPath); Thread t = new Thread(new DeleteThread(fs, testFolderPath)); t.start(); @@ -223,20 +251,38 @@ public void testMultiThreadedPageBlobListStatusScenario() throws Throwable { fs.listStatus(testFolderPath); } - /* - * Test to validate correct exception is thrown for Multithreaded read - * scenario for page blobs + /** + * Tests basic multi threaded listStatus scenario. */ + @Test(expected = FileNotFoundException.class) + public void testMultiThreadedPageBlobListStatusScenario() throws Throwable { - @Test(expected=FileNotFoundException.class) + createTestFolder( + getPageBlobTestStorageAccount(), + testFolderPath); + Thread t = new Thread(new DeleteThread(fs, testFolderPath)); + t.start(); + while (t.isAlive()) { + fs.listStatus(testFolderPath); + } + fs.listStatus(testFolderPath); + } + + /** + * Test to validate correct exception is thrown for Multithreaded read + * scenario for page blobs. + */ + @Test(expected = FileNotFoundException.class) public void testMultiThreadedPageBlobReadScenario() throws Throwable { - AzureBlobStorageTestAccount testAccount = ExceptionHandlingTestHelper.getPageBlobTestStorageAccount(); - fs = testAccount.getFileSystem(); - Path testFilePath1 = new Path("test1.dat"); + bindToTestAccount(getPageBlobTestStorageAccount()); + Path base = methodPath(); + Path testFilePath1 = new Path(base, "test1.dat"); + Path renamePath = new Path(base, "test2.dat"); getInputStreamToTest(fs, testFilePath1); - Thread renameThread = new Thread(new RenameThread(fs, testFilePath1)); + Thread renameThread = new Thread( + new RenameThread(fs, testFilePath1, renamePath)); renameThread.start(); renameThread.join(); @@ -244,87 +290,77 @@ public void testMultiThreadedPageBlobReadScenario() throws Throwable { inputStream.read(readBuffer); } - /* + /** * Test to validate correct exception is thrown for Multithreaded seek - * scenario for page blobs + * scenario for page blobs. */ - @Test(expected=FileNotFoundException.class) + @Test(expected = FileNotFoundException.class) public void testMultiThreadedPageBlobSeekScenario() throws Throwable { - AzureBlobStorageTestAccount testAccount = ExceptionHandlingTestHelper.getPageBlobTestStorageAccount(); - fs = testAccount.getFileSystem(); - Path testFilePath1 = new Path("test1.dat"); + bindToTestAccount(getPageBlobTestStorageAccount()); + + Path base = methodPath(); + Path testFilePath1 = new Path(base, "test1.dat"); + Path renamePath = new Path(base, "test2.dat"); getInputStreamToTest(fs, testFilePath1); - Thread renameThread = new Thread(new RenameThread(fs, testFilePath1)); + Thread renameThread = new Thread( + new RenameThread(fs, testFilePath1, renamePath)); renameThread.start(); renameThread.join(); inputStream.seek(5); } - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } - @After - public void tearDown() throws Exception { + /** + * Helper thread that just renames the test file. + */ + private static class RenameThread implements Runnable { - if (inputStream != null) { - inputStream.close(); + private final FileSystem fs; + private final Path testPath; + private final Path renamePath; + + RenameThread(FileSystem fs, + Path testPath, + Path renamePath) { + this.fs = fs; + this.testPath = testPath; + this.renamePath = renamePath; } - if (fs != null && fs.exists(testPath)) { - fs.delete(testPath, true); - } - } -} - -/* - * Helper thread that just renames the test file. - */ -class RenameThread implements Runnable { - - private FileSystem fs; - private Path testPath; - private Path renamePath = new Path("test2.dat"); - - public RenameThread(FileSystem fs, Path testPath) { - this.fs = fs; - this.testPath = testPath; - } - - @Override - public void run(){ - try { - fs.rename(testPath, renamePath); - }catch (Exception e) { - // Swallowing the exception as the - // correctness of the test is controlled - // by the other thread - } - } -} - -class DeleteThread implements Runnable { - private FileSystem fs; - private Path testPath; - - public DeleteThread(FileSystem fs, Path testPath) { - this.fs = fs; - this.testPath = testPath; - } - - @Override - public void run() { - try { - fs.delete(testPath, true); - } catch (Exception e) { - // Swallowing the exception as the - // correctness of the test is controlled - // by the other thread + @Override + public void run() { + try { + fs.rename(testPath, renamePath); + } catch (Exception e) { + // Swallowing the exception as the + // correctness of the test is controlled + // by the other thread + } + } + } + + private static class DeleteThread implements Runnable { + private final FileSystem fs; + private final Path testPath; + + DeleteThread(FileSystem fs, Path testPath) { + this.fs = fs; + this.testPath = testPath; + } + + @Override + public void run() { + try { + fs.delete(testPath, true); + } catch (Exception e) { + // Swallowing the exception as the + // correctness of the test is controlled + // by the other thread + } } } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestFileSystemOperationsWithThreads.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationsWithThreads.java similarity index 99% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestFileSystemOperationsWithThreads.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationsWithThreads.java index fd3690c4a6c..4389fda393c 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestFileSystemOperationsWithThreads.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationsWithThreads.java @@ -45,7 +45,7 @@ /** * Tests the Native Azure file system (WASB) using parallel threads for rename and delete operations. */ -public class TestFileSystemOperationsWithThreads extends AbstractWasbTestBase { +public class ITestFileSystemOperationsWithThreads extends AbstractWasbTestBase { private final int renameThreads = 10; private final int deleteThreads = 20; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFSAuthWithBlobSpecificKeys.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSAuthWithBlobSpecificKeys.java similarity index 74% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFSAuthWithBlobSpecificKeys.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSAuthWithBlobSpecificKeys.java index 6149154a271..d7e48317bd2 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFSAuthWithBlobSpecificKeys.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSAuthWithBlobSpecificKeys.java @@ -26,19 +26,15 @@ * Test class to hold all WASB authorization tests that use blob-specific keys * to access storage. */ -public class TestNativeAzureFSAuthWithBlobSpecificKeys - extends TestNativeAzureFileSystemAuthorizationWithOwner { +public class ITestNativeAzureFSAuthWithBlobSpecificKeys + extends ITestNativeAzureFileSystemAuthorizationWithOwner { + @Override - public Configuration getConfiguration() { - Configuration conf = super.getConfiguration(); + public Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); conf.set(KEY_USE_CONTAINER_SASKEY_FOR_ALL_ACCESS, "false"); return conf; } - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - Configuration conf = getConfiguration(); - return AzureBlobStorageTestAccount.create(conf); - } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFSAuthorizationCaching.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSAuthorizationCaching.java similarity index 73% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFSAuthorizationCaching.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSAuthorizationCaching.java index 84558f8660f..c73b1cc03d4 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFSAuthorizationCaching.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSAuthorizationCaching.java @@ -19,7 +19,6 @@ package org.apache.hadoop.fs.azure; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.ContractTestUtils; import org.junit.Test; import static org.apache.hadoop.fs.azure.CachingAuthorizer.KEY_AUTH_SERVICE_CACHING_ENABLE; @@ -27,34 +26,28 @@ /** * Test class to hold all WASB authorization caching related tests. */ -public class TestNativeAzureFSAuthorizationCaching - extends TestNativeAzureFileSystemAuthorizationWithOwner { +public class ITestNativeAzureFSAuthorizationCaching + extends ITestNativeAzureFileSystemAuthorizationWithOwner { private static final int DUMMY_TTL_VALUE = 5000; @Override - public Configuration getConfiguration() { - Configuration conf = super.getConfiguration(); + public Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); conf.set(KEY_AUTH_SERVICE_CACHING_ENABLE, "true"); return conf; } - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - Configuration conf = getConfiguration(); - return AzureBlobStorageTestAccount.create(conf); - } - /** * Test to verify cache behavior -- assert that PUT overwrites value if present */ @Test public void testCachePut() throws Throwable { CachingAuthorizer cache = new CachingAuthorizer<>(DUMMY_TTL_VALUE, "TEST"); - cache.init(getConfiguration()); + cache.init(createConfiguration()); cache.put("TEST", 1); cache.put("TEST", 3); int result = cache.get("TEST"); - ContractTestUtils.assertTrue("Cache returned unexpected result", result == 3); + assertEquals("Cache returned unexpected result", 3, result); } -} \ No newline at end of file +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFSPageBlobLive.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSPageBlobLive.java similarity index 97% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFSPageBlobLive.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSPageBlobLive.java index 208cff3952f..a4d8729a680 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFSPageBlobLive.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSPageBlobLive.java @@ -24,7 +24,7 @@ * operations on page blob files and folders work as expected. * These operations include create, delete, rename, list, and so on. */ -public class TestNativeAzureFSPageBlobLive extends +public class ITestNativeAzureFSPageBlobLive extends NativeAzureFileSystemBaseTest { @Override diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAppend.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemAppend.java similarity index 82% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAppend.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemAppend.java index a2b35cb5b67..29611bf24d8 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAppend.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemAppend.java @@ -28,26 +28,34 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.test.GenericTestUtils; -import org.junit.Assert; -import org.junit.Before; + import org.junit.Test; -public class TestNativeAzureFileSystemAppend extends AbstractWasbTestBase { +/** + * Test append operations. + */ +public class ITestNativeAzureFileSystemAppend extends AbstractWasbTestBase { - private static final String TEST_FILE = "test.dat"; - private static final Path TEST_PATH = new Path(TEST_FILE); + private Path testPath; - private AzureBlobStorageTestAccount testAccount = null; + @Override + public Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + conf.setBoolean(NativeAzureFileSystem.APPEND_SUPPORT_ENABLE_PROPERTY_NAME, + true); + return conf; + } - @Before + @Override public void setUp() throws Exception { super.setUp(); - testAccount = createTestAccount(); - fs = testAccount.getFileSystem(); - Configuration conf = fs.getConf(); - conf.setBoolean(NativeAzureFileSystem.APPEND_SUPPORT_ENABLE_PROPERTY_NAME, true); - URI uri = fs.getUri(); - fs.initialize(uri, conf); + testPath = methodPath(); + } + + + @Override + protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + return AzureBlobStorageTestAccount.create(createConfiguration()); } /* @@ -63,9 +71,7 @@ private static byte[] getTestData(int size) { // Helper method to create file and write fileSize bytes of data on it. private byte[] createBaseFileWithData(int fileSize, Path testPath) throws Throwable { - FSDataOutputStream createStream = null; - try { - createStream = fs.create(testPath); + try(FSDataOutputStream createStream = fs.create(testPath)) { byte[] fileData = null; if (fileSize != 0) { @@ -73,10 +79,6 @@ private byte[] createBaseFileWithData(int fileSize, Path testPath) throws Throwa createStream.write(fileData); } return fileData; - } finally { - if (createStream != null) { - createStream.close(); - } } } @@ -116,10 +118,8 @@ private boolean verifyFileData(int dataLength, byte[] testData, int testDataInde */ private boolean verifyAppend(byte[] testData, Path testFile) { - FSDataInputStream srcStream = null; - try { + try(FSDataInputStream srcStream = fs.open(testFile)) { - srcStream = fs.open(testFile); int baseBufferSize = 2048; int testDataSize = testData.length; int testDataIndex = 0; @@ -140,14 +140,6 @@ private boolean verifyAppend(byte[] testData, Path testFile) { return true; } catch(Exception ex) { return false; - } finally { - if (srcStream != null) { - try { - srcStream.close(); - } catch(IOException ioe) { - // Swallowing - } - } } } @@ -161,18 +153,18 @@ public void testSingleAppend() throws Throwable{ FSDataOutputStream appendStream = null; try { int baseDataSize = 50; - byte[] baseDataBuffer = createBaseFileWithData(baseDataSize, TEST_PATH); + byte[] baseDataBuffer = createBaseFileWithData(baseDataSize, testPath); int appendDataSize = 20; byte[] appendDataBuffer = getTestData(appendDataSize); - appendStream = fs.append(TEST_PATH, 10); + appendStream = fs.append(testPath, 10); appendStream.write(appendDataBuffer); appendStream.close(); byte[] testData = new byte[baseDataSize + appendDataSize]; System.arraycopy(baseDataBuffer, 0, testData, 0, baseDataSize); System.arraycopy(appendDataBuffer, 0, testData, baseDataSize, appendDataSize); - Assert.assertTrue(verifyAppend(testData, TEST_PATH)); + assertTrue(verifyAppend(testData, testPath)); } finally { if (appendStream != null) { appendStream.close(); @@ -189,15 +181,15 @@ public void testSingleAppendOnEmptyFile() throws Throwable { FSDataOutputStream appendStream = null; try { - createBaseFileWithData(0, TEST_PATH); + createBaseFileWithData(0, testPath); int appendDataSize = 20; byte[] appendDataBuffer = getTestData(appendDataSize); - appendStream = fs.append(TEST_PATH, 10); + appendStream = fs.append(testPath, 10); appendStream.write(appendDataBuffer); appendStream.close(); - Assert.assertTrue(verifyAppend(appendDataBuffer, TEST_PATH)); + assertTrue(verifyAppend(appendDataBuffer, testPath)); } finally { if (appendStream != null) { appendStream.close(); @@ -215,11 +207,11 @@ public void testSingleAppenderScenario() throws Throwable { FSDataOutputStream appendStream2 = null; IOException ioe = null; try { - createBaseFileWithData(0, TEST_PATH); - appendStream1 = fs.append(TEST_PATH, 10); + createBaseFileWithData(0, testPath); + appendStream1 = fs.append(testPath, 10); boolean encounteredException = false; try { - appendStream2 = fs.append(TEST_PATH, 10); + appendStream2 = fs.append(testPath, 10); } catch(IOException ex) { encounteredException = true; ioe = ex; @@ -227,7 +219,7 @@ public void testSingleAppenderScenario() throws Throwable { appendStream1.close(); - Assert.assertTrue(encounteredException); + assertTrue(encounteredException); GenericTestUtils.assertExceptionContains("Unable to set Append lease on the Blob", ioe); } finally { if (appendStream1 != null) { @@ -247,7 +239,7 @@ public void testSingleAppenderScenario() throws Throwable { public void testMultipleAppends() throws Throwable { int baseDataSize = 50; - byte[] baseDataBuffer = createBaseFileWithData(baseDataSize, TEST_PATH); + byte[] baseDataBuffer = createBaseFileWithData(baseDataSize, testPath); int appendDataSize = 100; int targetAppendCount = 50; @@ -264,7 +256,7 @@ public void testMultipleAppends() throws Throwable { while (appendCount < targetAppendCount) { byte[] appendDataBuffer = getTestData(appendDataSize); - appendStream = fs.append(TEST_PATH, 30); + appendStream = fs.append(testPath, 30); appendStream.write(appendDataBuffer); appendStream.close(); @@ -273,7 +265,7 @@ public void testMultipleAppends() throws Throwable { appendCount++; } - Assert.assertTrue(verifyAppend(testData, TEST_PATH)); + assertTrue(verifyAppend(testData, testPath)); } finally { if (appendStream != null) { @@ -289,7 +281,7 @@ public void testMultipleAppends() throws Throwable { public void testMultipleAppendsOnSameStream() throws Throwable { int baseDataSize = 50; - byte[] baseDataBuffer = createBaseFileWithData(baseDataSize, TEST_PATH); + byte[] baseDataBuffer = createBaseFileWithData(baseDataSize, testPath); int appendDataSize = 100; int targetAppendCount = 50; byte[] testData = new byte[baseDataSize + (appendDataSize*targetAppendCount)]; @@ -304,7 +296,7 @@ public void testMultipleAppendsOnSameStream() throws Throwable { while (appendCount < targetAppendCount) { - appendStream = fs.append(TEST_PATH, 50); + appendStream = fs.append(testPath, 50); int singleAppendChunkSize = 20; int appendRunSize = 0; @@ -323,7 +315,7 @@ public void testMultipleAppendsOnSameStream() throws Throwable { appendCount++; } - Assert.assertTrue(verifyAppend(testData, TEST_PATH)); + assertTrue(verifyAppend(testData, testPath)); } finally { if (appendStream != null) { appendStream.close(); @@ -346,8 +338,8 @@ public void testFalseConfigurationFlagBehavior() throws Throwable { FSDataOutputStream appendStream = null; try { - createBaseFileWithData(0, TEST_PATH); - appendStream = fs.append(TEST_PATH, 10); + createBaseFileWithData(0, testPath); + appendStream = fs.append(testPath, 10); } finally { if (appendStream != null) { appendStream.close(); @@ -355,8 +347,4 @@ public void testFalseConfigurationFlagBehavior() throws Throwable { } } - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAtomicRenameDirList.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemAtomicRenameDirList.java similarity index 78% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAtomicRenameDirList.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemAtomicRenameDirList.java index 602c1f7f666..869a31c33a2 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAtomicRenameDirList.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemAtomicRenameDirList.java @@ -22,24 +22,29 @@ import java.net.URI; import org.apache.hadoop.conf.Configuration; + import org.junit.Test; -public class TestNativeAzureFileSystemAtomicRenameDirList +/** + * Test atomic renaming. + */ +public class ITestNativeAzureFileSystemAtomicRenameDirList extends AbstractWasbTestBase { - private AzureBlobStorageTestAccount testAccount; // HBase-site config controlling HBase root dir private static final String HBASE_ROOT_DIR_CONF_STRING = "hbase.rootdir"; - private static final String HBASE_ROOT_DIR_VALUE_ON_DIFFERENT_FS = "wasb://somedifferentfilesystem.blob.core.windows.net/hbase"; + private static final String HBASE_ROOT_DIR_VALUE_ON_DIFFERENT_FS = + "wasb://somedifferentfilesystem.blob.core.windows.net/hbase"; + @Override protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - testAccount = AzureBlobStorageTestAccount.create(); - return testAccount; + return AzureBlobStorageTestAccount.create(); } @Test - public void testAzureNativeStoreIsAtomicRenameKeyDoesNotThrowNPEOnInitializingWithNonDefaultURI () throws IOException { - NativeAzureFileSystem azureFs = (NativeAzureFileSystem)fs; + public void testAtomicRenameKeyDoesntNPEOnInitializingWithNonDefaultURI() + throws IOException { + NativeAzureFileSystem azureFs = fs; AzureNativeFileSystemStore azureStore = azureFs.getStore(); Configuration conf = fs.getConf(); conf.set(HBASE_ROOT_DIR_CONF_STRING, HBASE_ROOT_DIR_VALUE_ON_DIFFERENT_FS); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAuthorizationWithOwner.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemAuthorizationWithOwner.java similarity index 92% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAuthorizationWithOwner.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemAuthorizationWithOwner.java index 4bd463355f2..3ec42f0b2b4 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAuthorizationWithOwner.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemAuthorizationWithOwner.java @@ -24,23 +24,23 @@ import org.apache.hadoop.fs.Path; import org.junit.Test; -import org.junit.Before; + import static org.junit.Assert.assertEquals; /** * Test class that runs wasb authorization tests with owner check enabled. */ -public class TestNativeAzureFileSystemAuthorizationWithOwner +public class ITestNativeAzureFileSystemAuthorizationWithOwner extends TestNativeAzureFileSystemAuthorization { - @Before - public void beforeMethod() { - super.beforeMethod(); + @Override + public void setUp() throws Exception { + super.setUp(); authorizer.init(fs.getConf(), true); } /** - * Test case when owner matches current user + * Test case when owner matches current user. */ @Test public void testOwnerPermissionPositive() throws Throwable { @@ -71,7 +71,7 @@ public void testOwnerPermissionPositive() throws Throwable { } /** - * Negative test case for owner does not match current user + * Negative test case for owner does not match current user. */ @Test public void testOwnerPermissionNegative() throws Throwable { @@ -108,7 +108,7 @@ public Void run() throws Exception { /** * Test to verify that retrieving owner information does not - * throw when file/folder does not exist + * throw when file/folder does not exist. */ @Test public void testRetrievingOwnerDoesNotFailWhenFileDoesNotExist() throws Throwable { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemClientLogging.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemClientLogging.java similarity index 93% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemClientLogging.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemClientLogging.java index 4114e60dd95..f73a7638a3e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemClientLogging.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemClientLogging.java @@ -18,9 +18,6 @@ package org.apache.hadoop.fs.azure; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - import java.net.URI; import java.util.StringTokenizer; @@ -36,12 +33,12 @@ * testing with Live Azure storage because Emulator does not have support for * client-side logging. * + * Important: Do not attempt to move off commons-logging. + * The tests will fail. */ -public class TestNativeAzureFileSystemClientLogging +public class ITestNativeAzureFileSystemClientLogging extends AbstractWasbTestBase { - private AzureBlobStorageTestAccount testAccount; - // Core-site config controlling Azure Storage Client logging private static final String KEY_LOGGING_CONF_STRING = "fs.azure.storage.client.logging"; @@ -134,7 +131,6 @@ public void testLoggingDisabled() throws Exception { @Override protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - testAccount = AzureBlobStorageTestAccount.create(); - return testAccount; + return AzureBlobStorageTestAccount.create(); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemConcurrencyLive.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemConcurrencyLive.java similarity index 74% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemConcurrencyLive.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemConcurrencyLive.java index 7c5899d66bf..87cac15d9cd 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemConcurrencyLive.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemConcurrencyLive.java @@ -35,11 +35,12 @@ /*** * Test class to hold all Live Azure storage concurrency tests. */ -public class TestNativeAzureFileSystemConcurrencyLive +public class ITestNativeAzureFileSystemConcurrencyLive extends AbstractWasbTestBase { private static final int THREAD_COUNT = 102; private static final int TEST_EXECUTION_TIMEOUT = 5000; + @Override protected AzureBlobStorageTestAccount createTestAccount() throws Exception { return AzureBlobStorageTestAccount.create(); @@ -53,7 +54,7 @@ protected AzureBlobStorageTestAccount createTestAccount() throws Exception { */ @Test(timeout = TEST_EXECUTION_TIMEOUT) public void testConcurrentCreateDeleteFile() throws Exception { - Path testFile = new Path("test.dat"); + Path testFile = methodPath(); List tasks = new ArrayList<>(THREAD_COUNT); @@ -128,57 +129,57 @@ public void testConcurrentDeleteFile() throws Exception { } } } -} -abstract class FileSystemTask implements Callable { - private final FileSystem fileSystem; - private final Path path; + abstract class FileSystemTask implements Callable { + private final FileSystem fileSystem; + private final Path path; - protected FileSystem getFileSystem() { - return this.fileSystem; + protected FileSystem getFileSystem() { + return this.fileSystem; + } + + protected Path getFilePath() { + return this.path; + } + + FileSystemTask(FileSystem fs, Path p) { + this.fileSystem = fs; + this.path = p; + } + + public abstract V call() throws Exception; } - protected Path getFilePath() { - return this.path; + class DeleteFileTask extends FileSystemTask { + + DeleteFileTask(FileSystem fs, Path p) { + super(fs, p); + } + + @Override + public Boolean call() throws Exception { + return this.getFileSystem().delete(this.getFilePath(), false); + } } - FileSystemTask(FileSystem fs, Path p) { - this.fileSystem = fs; - this.path = p; - } + class CreateFileTask extends FileSystemTask { + CreateFileTask(FileSystem fs, Path p) { + super(fs, p); + } - public abstract V call() throws Exception; -} + public Void call() throws Exception { + FileSystem fs = getFileSystem(); + Path p = getFilePath(); -class DeleteFileTask extends FileSystemTask { + // Create an empty file and close the stream. + FSDataOutputStream stream = fs.create(p, true); + stream.close(); - DeleteFileTask(FileSystem fs, Path p) { - super(fs, p); - } + // Delete the file. We don't care if delete returns true or false. + // We just want to ensure the file does not exist. + this.getFileSystem().delete(this.getFilePath(), false); - @Override - public Boolean call() throws Exception { - return this.getFileSystem().delete(this.getFilePath(), false); + return null; + } } } - -class CreateFileTask extends FileSystemTask { - CreateFileTask(FileSystem fs, Path p) { - super(fs, p); - } - - public Void call() throws Exception { - FileSystem fs = getFileSystem(); - Path p = getFilePath(); - - // Create an empty file and close the stream. - FSDataOutputStream stream = fs.create(p, true); - stream.close(); - - // Delete the file. We don't care if delete returns true or false. - // We just want to ensure the file does not exist. - this.getFileSystem().delete(this.getFilePath(), false); - - return null; - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractEmulator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractEmulator.java similarity index 63% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractEmulator.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractEmulator.java index 217ca81550a..4836fc474e4 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractEmulator.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractEmulator.java @@ -21,28 +21,45 @@ import static org.junit.Assume.assumeNotNull; import org.apache.hadoop.fs.FileSystemContractBaseTest; -import org.junit.After; -import org.junit.Before; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azure.integration.AzureTestUtils; -public class TestNativeAzureFileSystemContractEmulator extends +import org.junit.Before; +import org.junit.Rule; +import org.junit.rules.TestName; + +/** + * Run the {@code FileSystemContractBaseTest} tests against the emulator + */ +public class ITestNativeAzureFileSystemContractEmulator extends FileSystemContractBaseTest { private AzureBlobStorageTestAccount testAccount; + private Path basePath; + + @Rule + public TestName methodName = new TestName(); + + private void nameThread() { + Thread.currentThread().setName("JUnit-" + methodName.getMethodName()); + } @Before public void setUp() throws Exception { + nameThread(); testAccount = AzureBlobStorageTestAccount.createForEmulator(); if (testAccount != null) { fs = testAccount.getFileSystem(); } assumeNotNull(fs); + basePath = fs.makeQualified( + AzureTestUtils.createTestPath( + new Path("ITestNativeAzureFileSystemContractEmulator"))); } - @After + @Override public void tearDown() throws Exception { - if (testAccount != null) { - testAccount.cleanup(); - testAccount = null; - fs = null; - } + super.tearDown(); + testAccount = AzureTestUtils.cleanup(testAccount); + fs = null; } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractLive.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractLive.java similarity index 67% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractLive.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractLive.java index b546009426d..d3d1bd85957 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractLive.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractLive.java @@ -21,31 +21,59 @@ import static org.junit.Assume.assumeNotNull; import org.apache.hadoop.fs.FileSystemContractBaseTest; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azure.integration.AzureTestConstants; +import org.apache.hadoop.fs.azure.integration.AzureTestUtils; + import org.junit.After; import org.junit.Before; import org.junit.Ignore; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TestName; -public class TestNativeAzureFileSystemContractLive extends +/** + * Run the {@link FileSystemContractBaseTest} test suite against azure storage. + */ +public class ITestNativeAzureFileSystemContractLive extends FileSystemContractBaseTest { private AzureBlobStorageTestAccount testAccount; + private Path basePath; + + @Rule + public TestName methodName = new TestName(); + + private void nameThread() { + Thread.currentThread().setName("JUnit-" + methodName.getMethodName()); + } @Before public void setUp() throws Exception { + nameThread(); testAccount = AzureBlobStorageTestAccount.create(); if (testAccount != null) { fs = testAccount.getFileSystem(); } assumeNotNull(fs); + basePath = fs.makeQualified( + AzureTestUtils.createTestPath( + new Path("NativeAzureFileSystemContractLive"))); } - @After + @Override public void tearDown() throws Exception { - if (testAccount != null) { - testAccount.cleanup(); - testAccount = null; - fs = null; - } + super.tearDown(); + testAccount = AzureTestUtils.cleanup(testAccount); + fs = null; + } + + @Override + public Path getTestBaseDir() { + return basePath; + } + + protected int getGlobalTimeout() { + return AzureTestConstants.AZURE_TEST_TIMEOUT; } /** diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractPageBlobLive.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractPageBlobLive.java similarity index 70% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractPageBlobLive.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractPageBlobLive.java index 2a88ad27cfe..03e90aa0543 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractPageBlobLive.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractPageBlobLive.java @@ -20,15 +20,31 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystemContractBaseTest; -import org.junit.After; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azure.integration.AzureTestConstants; +import org.apache.hadoop.fs.azure.integration.AzureTestUtils; + import static org.junit.Assume.assumeNotNull; import org.junit.Before; import org.junit.Ignore; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TestName; -public class TestNativeAzureFileSystemContractPageBlobLive extends +/** + * Run the {@link FileSystemContractBaseTest} test suite against azure + * storage, after switching the FS using page blobs everywhere. + */ +public class ITestNativeAzureFileSystemContractPageBlobLive extends FileSystemContractBaseTest { private AzureBlobStorageTestAccount testAccount; + private Path basePath; + @Rule + public TestName methodName = new TestName(); + + private void nameThread() { + Thread.currentThread().setName("JUnit-" + methodName.getMethodName()); + } private AzureBlobStorageTestAccount createTestAccount() throws Exception { @@ -46,19 +62,24 @@ private AzureBlobStorageTestAccount createTestAccount() @Before public void setUp() throws Exception { testAccount = createTestAccount(); - if (testAccount != null) { - fs = testAccount.getFileSystem(); - } - assumeNotNull(fs); + assumeNotNull(testAccount); + fs = testAccount.getFileSystem(); + basePath = AzureTestUtils.pathForTests(fs, "filesystemcontractpageblob"); } - @After + @Override public void tearDown() throws Exception { - if (testAccount != null) { - testAccount.cleanup(); - testAccount = null; - fs = null; - } + testAccount = AzureTestUtils.cleanup(testAccount); + fs = null; + } + + protected int getGlobalTimeout() { + return AzureTestConstants.AZURE_TEST_TIMEOUT; + } + + @Override + public Path getTestBaseDir() { + return basePath; } /** diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemLive.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemLive.java similarity index 87% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemLive.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemLive.java index 6baba33bce0..f969968110e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemLive.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemLive.java @@ -18,10 +18,6 @@ package org.apache.hadoop.fs.azure; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - import java.util.concurrent.CountDownLatch; import org.apache.hadoop.io.IOUtils; @@ -33,11 +29,10 @@ import com.microsoft.azure.storage.StorageException; -/* - * Tests the Native Azure file system (WASB) against an actual blob store if - * provided in the environment. +/** + * Tests the Native Azure file system (WASB) against an actual blob store. */ -public class TestNativeAzureFileSystemLive extends +public class ITestNativeAzureFileSystemLive extends NativeAzureFileSystemBaseTest { @Override @@ -48,22 +43,22 @@ protected AzureBlobStorageTestAccount createTestAccount() throws Exception { @Test public void testLazyRenamePendingCanOverwriteExistingFile() throws Exception { - final String SRC_FILE_KEY = "srcFile"; - final String DST_FILE_KEY = "dstFile"; - Path srcPath = new Path(SRC_FILE_KEY); + final String srcFile = "srcFile"; + final String dstFile = "dstFile"; + Path srcPath = path(srcFile); FSDataOutputStream srcStream = fs.create(srcPath); assertTrue(fs.exists(srcPath)); - Path dstPath = new Path(DST_FILE_KEY); + Path dstPath = path(dstFile); FSDataOutputStream dstStream = fs.create(dstPath); assertTrue(fs.exists(dstPath)); - NativeAzureFileSystem nfs = (NativeAzureFileSystem)fs; + NativeAzureFileSystem nfs = fs; final String fullSrcKey = nfs.pathToKey(nfs.makeAbsolute(srcPath)); final String fullDstKey = nfs.pathToKey(nfs.makeAbsolute(dstPath)); nfs.getStoreInterface().rename(fullSrcKey, fullDstKey, true, null); assertTrue(fs.exists(dstPath)); assertFalse(fs.exists(srcPath)); - IOUtils.cleanup(null, srcStream); - IOUtils.cleanup(null, dstStream); + IOUtils.cleanupWithLogger(null, srcStream); + IOUtils.cleanupWithLogger(null, dstStream); } /** * Tests fs.delete() function to delete a blob when another blob is holding a @@ -77,12 +72,11 @@ public void testLazyRenamePendingCanOverwriteExistingFile() public void testDeleteThrowsExceptionWithLeaseExistsErrorMessage() throws Exception { LOG.info("Starting test"); - final String FILE_KEY = "fileWithLease"; // Create the file - Path path = new Path(FILE_KEY); + Path path = methodPath(); fs.create(path); - assertTrue(fs.exists(path)); - NativeAzureFileSystem nfs = (NativeAzureFileSystem)fs; + assertPathExists("test file", path); + NativeAzureFileSystem nfs = fs; final String fullKey = nfs.pathToKey(nfs.makeAbsolute(path)); final AzureNativeFileSystemStore store = nfs.getStore(); @@ -142,7 +136,7 @@ public void run() { store.delete(fullKey); // At this point file SHOULD BE DELETED - assertFalse(fs.exists(path)); + assertPathDoesNotExist("Leased path", path); } /** @@ -153,7 +147,7 @@ public void run() { */ @Test public void testIsPageBlobKey() { - AzureNativeFileSystemStore store = ((NativeAzureFileSystem) fs).getStore(); + AzureNativeFileSystemStore store = fs.getStore(); // Use literal strings so it's easier to understand the tests. // In case the constant changes, we want to know about it so we can update this test. @@ -184,7 +178,7 @@ public void testIsPageBlobKey() { @Test public void testIsAtomicRenameKey() { - AzureNativeFileSystemStore store = ((NativeAzureFileSystem) fs).getStore(); + AzureNativeFileSystemStore store = fs.getStore(); // We want to know if the default configuration changes so we can fix // this test. @@ -225,15 +219,15 @@ public void testIsAtomicRenameKey() { @Test public void testMkdirOnExistingFolderWithLease() throws Exception { SelfRenewingLease lease; - final String FILE_KEY = "folderWithLease"; // Create the folder - fs.mkdirs(new Path(FILE_KEY)); - NativeAzureFileSystem nfs = (NativeAzureFileSystem) fs; - String fullKey = nfs.pathToKey(nfs.makeAbsolute(new Path(FILE_KEY))); + Path path = methodPath(); + fs.mkdirs(path); + NativeAzureFileSystem nfs = fs; + String fullKey = nfs.pathToKey(nfs.makeAbsolute(path)); AzureNativeFileSystemStore store = nfs.getStore(); // Acquire the lease on the folder lease = store.acquireLease(fullKey); - assertTrue(lease.getLeaseID() != null); + assertNotNull("lease ID", lease.getLeaseID() != null); // Try to create the same folder store.storeEmptyFolder(fullKey, nfs.createPermissionStatus(FsPermission.getDirDefault())); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestOutOfBandAzureBlobOperationsLive.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutOfBandAzureBlobOperationsLive.java similarity index 90% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestOutOfBandAzureBlobOperationsLive.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutOfBandAzureBlobOperationsLive.java index 60b01c616eb..b63aaf0b680 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestOutOfBandAzureBlobOperationsLive.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutOfBandAzureBlobOperationsLive.java @@ -18,40 +18,22 @@ package org.apache.hadoop.fs.azure; -import static org.junit.Assert.assertTrue; -import static org.junit.Assume.assumeNotNull; - import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.security.UserGroupInformation; -import org.junit.After; -import org.junit.Before; import org.junit.Test; import com.microsoft.azure.storage.blob.BlobOutputStream; import com.microsoft.azure.storage.blob.CloudBlockBlob; -public class TestOutOfBandAzureBlobOperationsLive { - private FileSystem fs; - private AzureBlobStorageTestAccount testAccount; +/** + * Live blob operations. + */ +public class ITestOutOfBandAzureBlobOperationsLive extends AbstractWasbTestBase { - @Before - public void setUp() throws Exception { - testAccount = AzureBlobStorageTestAccount.create(); - if (testAccount != null) { - fs = testAccount.getFileSystem(); - } - assumeNotNull(testAccount); - } - - @After - public void tearDown() throws Exception { - if (testAccount != null) { - testAccount.cleanup(); - testAccount = null; - fs = null; - } + @Override + protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + return AzureBlobStorageTestAccount.create(); } // scenario for this particular test described at MONARCH-HADOOP-764 diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestReadAndSeekPageBlobAfterWrite.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestReadAndSeekPageBlobAfterWrite.java similarity index 69% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestReadAndSeekPageBlobAfterWrite.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestReadAndSeekPageBlobAfterWrite.java index 41b8386fac8..f2af116330f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestReadAndSeekPageBlobAfterWrite.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestReadAndSeekPageBlobAfterWrite.java @@ -18,37 +18,33 @@ package org.apache.hadoop.fs.azure; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assume.assumeNotNull; - import java.io.IOException; import java.io.OutputStream; import java.util.Arrays; import java.util.Random; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.AzureException; +import org.apache.hadoop.fs.azure.integration.AbstractAzureScaleTest; import org.apache.hadoop.util.Time; -import org.junit.After; -import org.junit.Before; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.fs.azure.integration.AzureTestUtils .*; /** * Write data into a page blob and verify you can read back all of it * or just a part of it. */ -public class TestReadAndSeekPageBlobAfterWrite { - private static final Log LOG = LogFactory.getLog(TestReadAndSeekPageBlobAfterWrite.class); +public class ITestReadAndSeekPageBlobAfterWrite extends AbstractAzureScaleTest { + private static final Logger LOG = + LoggerFactory.getLogger(ITestReadAndSeekPageBlobAfterWrite.class); private FileSystem fs; - private AzureBlobStorageTestAccount testAccount; private byte[] randomData; // Page blob physical page size @@ -63,35 +59,28 @@ public class TestReadAndSeekPageBlobAfterWrite { // A key with a prefix under /pageBlobs, which for the test file system will // force use of a page blob. private static final String KEY = "/pageBlobs/file.dat"; - private static final Path PATH = new Path(KEY); // path of page blob file to read and write - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } + // path of page blob file to read and write + private Path blobPath; - @Before + @Override public void setUp() throws Exception { - testAccount = createTestAccount(); - if (testAccount != null) { - fs = testAccount.getFileSystem(); - } - assumeNotNull(testAccount); - + super.setUp(); + fs = getTestAccount().getFileSystem(); // Make sure we are using an integral number of pages. assertEquals(0, MAX_BYTES % PAGE_SIZE); // load an in-memory array of random data randomData = new byte[PAGE_SIZE * MAX_PAGES]; rand.nextBytes(randomData); + + blobPath = blobPath("ITestReadAndSeekPageBlobAfterWrite"); } - @After + @Override public void tearDown() throws Exception { - if (testAccount != null) { - testAccount.cleanup(); - testAccount = null; - fs = null; - } + deleteQuietly(fs, blobPath, true); + super.tearDown(); } /** @@ -101,9 +90,9 @@ public void tearDown() throws Exception { @Test public void testIsPageBlobFileName() { AzureNativeFileSystemStore store = ((NativeAzureFileSystem) fs).getStore(); - String[] a = KEY.split("/"); + String[] a = blobPath.toUri().getPath().split("/"); String key2 = a[1] + "/"; - assertTrue(store.isPageBlobKey(key2)); + assertTrue("Not a page blob: " + blobPath, store.isPageBlobKey(key2)); } /** @@ -114,7 +103,7 @@ public void testIsPageBlobFileName() { public void testReadAfterWriteRandomData() throws IOException { // local shorthand - final int PDS = PAGE_DATA_SIZE; + final int pds = PAGE_DATA_SIZE; // Test for sizes at and near page boundaries int[] dataSizes = { @@ -124,13 +113,13 @@ public void testReadAfterWriteRandomData() throws IOException { // Near first physical page boundary (because the implementation // stores PDS + the page header size bytes on each page). - PDS - 1, PDS, PDS + 1, PDS + 2, PDS + 3, + pds - 1, pds, pds + 1, pds + 2, pds + 3, // near second physical page boundary - (2 * PDS) - 1, (2 * PDS), (2 * PDS) + 1, (2 * PDS) + 2, (2 * PDS) + 3, + (2 * pds) - 1, (2 * pds), (2 * pds) + 1, (2 * pds) + 2, (2 * pds) + 3, // near tenth physical page boundary - (10 * PDS) - 1, (10 * PDS), (10 * PDS) + 1, (10 * PDS) + 2, (10 * PDS) + 3, + (10 * pds) - 1, (10 * pds), (10 * pds) + 1, (10 * pds) + 2, (10 * pds) + 3, // test one big size, >> 4MB (an internal buffer size in the code) MAX_BYTES @@ -152,7 +141,7 @@ private void testReadAfterWriteRandomData(int size) throws IOException { */ private void readRandomDataAndVerify(int size) throws AzureException, IOException { byte[] b = new byte[size]; - FSDataInputStream stream = fs.open(PATH); + FSDataInputStream stream = fs.open(blobPath); int bytesRead = stream.read(b); stream.close(); assertEquals(bytesRead, size); @@ -176,7 +165,7 @@ private boolean comparePrefix(byte[] a, byte[] b, int size) { // Write a specified amount of random data to the file path for this test class. private void writeRandomData(int size) throws IOException { - OutputStream output = fs.create(PATH); + OutputStream output = fs.create(blobPath); output.write(randomData, 0, size); output.close(); } @@ -190,43 +179,45 @@ public void testPageBlobSeekAndReadAfterWrite() throws IOException { writeRandomData(PAGE_SIZE * MAX_PAGES); int recordSize = 100; byte[] b = new byte[recordSize]; - FSDataInputStream stream = fs.open(PATH); - // Seek to a boundary around the middle of the 6th page - int seekPosition = 5 * PAGE_SIZE + 250; - stream.seek(seekPosition); - // Read a record's worth of bytes and verify results - int bytesRead = stream.read(b); - verifyReadRandomData(b, bytesRead, seekPosition, recordSize); + try(FSDataInputStream stream = fs.open(blobPath)) { + // Seek to a boundary around the middle of the 6th page + int seekPosition = 5 * PAGE_SIZE + 250; + stream.seek(seekPosition); - // Seek to another spot and read a record greater than a page - seekPosition = 10 * PAGE_SIZE + 250; - stream.seek(seekPosition); - recordSize = 1000; - b = new byte[recordSize]; - bytesRead = stream.read(b); - verifyReadRandomData(b, bytesRead, seekPosition, recordSize); + // Read a record's worth of bytes and verify results + int bytesRead = stream.read(b); + verifyReadRandomData(b, bytesRead, seekPosition, recordSize); - // Read the last 100 bytes of the file - recordSize = 100; - seekPosition = PAGE_SIZE * MAX_PAGES - recordSize; - stream.seek(seekPosition); - b = new byte[recordSize]; - bytesRead = stream.read(b); - verifyReadRandomData(b, bytesRead, seekPosition, recordSize); + // Seek to another spot and read a record greater than a page + seekPosition = 10 * PAGE_SIZE + 250; + stream.seek(seekPosition); + recordSize = 1000; + b = new byte[recordSize]; + bytesRead = stream.read(b); + verifyReadRandomData(b, bytesRead, seekPosition, recordSize); - // Read past the end of the file and we should get only partial data. - recordSize = 100; - seekPosition = PAGE_SIZE * MAX_PAGES - recordSize + 50; - stream.seek(seekPosition); - b = new byte[recordSize]; - bytesRead = stream.read(b); - assertEquals(50, bytesRead); + // Read the last 100 bytes of the file + recordSize = 100; + seekPosition = PAGE_SIZE * MAX_PAGES - recordSize; + stream.seek(seekPosition); + b = new byte[recordSize]; + bytesRead = stream.read(b); + verifyReadRandomData(b, bytesRead, seekPosition, recordSize); - // compare last 50 bytes written with those read - byte[] tail = Arrays.copyOfRange(randomData, seekPosition, randomData.length); - assertTrue(comparePrefix(tail, b, 50)); + // Read past the end of the file and we should get only partial data. + recordSize = 100; + seekPosition = PAGE_SIZE * MAX_PAGES - recordSize + 50; + stream.seek(seekPosition); + b = new byte[recordSize]; + bytesRead = stream.read(b); + assertEquals(50, bytesRead); + + // compare last 50 bytes written with those read + byte[] tail = Arrays.copyOfRange(randomData, seekPosition, randomData.length); + assertTrue(comparePrefix(tail, b, 50)); + } } // Verify that reading a record of data after seeking gives the expected data. @@ -253,16 +244,14 @@ public void testManySmallWritesWithHFlush() throws IOException { * The syncInterval is the number of writes after which to call hflush to * force the data to storage. */ - private void writeAndReadOneFile(int numWrites, int recordLength, int syncInterval) throws IOException { - final int NUM_WRITES = numWrites; - final int RECORD_LENGTH = recordLength; - final int SYNC_INTERVAL = syncInterval; + private void writeAndReadOneFile(int numWrites, + int recordLength, int syncInterval) throws IOException { // A lower bound on the minimum time we think it will take to do // a write to Azure storage. final long MINIMUM_EXPECTED_TIME = 20; - LOG.info("Writing " + NUM_WRITES * RECORD_LENGTH + " bytes to " + PATH.getName()); - FSDataOutputStream output = fs.create(PATH); + LOG.info("Writing " + numWrites * recordLength + " bytes to " + blobPath.getName()); + FSDataOutputStream output = fs.create(blobPath); int writesSinceHFlush = 0; try { @@ -270,11 +259,11 @@ private void writeAndReadOneFile(int numWrites, int recordLength, int syncInterv // to test concurrent execution gates. output.flush(); output.hflush(); - for (int i = 0; i < NUM_WRITES; i++) { - output.write(randomData, i * RECORD_LENGTH, RECORD_LENGTH); + for (int i = 0; i < numWrites; i++) { + output.write(randomData, i * recordLength, recordLength); writesSinceHFlush++; output.flush(); - if ((i % SYNC_INTERVAL) == 0) { + if ((i % syncInterval) == 0) { output.hflush(); writesSinceHFlush = 0; } @@ -293,8 +282,8 @@ private void writeAndReadOneFile(int numWrites, int recordLength, int syncInterv } // Read the data back and check it. - FSDataInputStream stream = fs.open(PATH); - int SIZE = NUM_WRITES * RECORD_LENGTH; + FSDataInputStream stream = fs.open(blobPath); + int SIZE = numWrites * recordLength; byte[] b = new byte[SIZE]; try { stream.seek(0); @@ -305,7 +294,7 @@ private void writeAndReadOneFile(int numWrites, int recordLength, int syncInterv } // delete the file - fs.delete(PATH, false); + fs.delete(blobPath, false); } // Test writing to a large file repeatedly as a stress test. @@ -324,32 +313,29 @@ public void testLargeFileStress() throws IOException { // Write to a file repeatedly to verify that it extends. // The page blob file should start out at 128MB and finish at 256MB. - @Test(timeout=300000) public void testFileSizeExtension() throws IOException { final int writeSize = 1024 * 1024; final int numWrites = 129; final byte dataByte = 5; byte[] data = new byte[writeSize]; Arrays.fill(data, dataByte); - FSDataOutputStream output = fs.create(PATH); - try { + try (FSDataOutputStream output = fs.create(blobPath)) { for (int i = 0; i < numWrites; i++) { output.write(data); output.hflush(); LOG.debug("total writes = " + (i + 1)); } - } finally { - output.close(); } // Show that we wrote more than the default page blob file size. assertTrue(numWrites * writeSize > PageBlobOutputStream.PAGE_BLOB_MIN_SIZE); // Verify we can list the new size. That will prove we expanded the file. - FileStatus[] status = fs.listStatus(PATH); - assertTrue(status[0].getLen() == numWrites * writeSize); - LOG.debug("Total bytes written to " + PATH + " = " + status[0].getLen()); - fs.delete(PATH, false); + FileStatus[] status = fs.listStatus(blobPath); + assertEquals("File size hasn't changed " + status, + numWrites * writeSize, status[0].getLen()); + LOG.debug("Total bytes written to " + blobPath + " = " + status[0].getLen()); + fs.delete(blobPath, false); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbRemoteCallHelper.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbRemoteCallHelper.java similarity index 96% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbRemoteCallHelper.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbRemoteCallHelper.java index 8aad9e9ecfc..062bc36da4b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbRemoteCallHelper.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbRemoteCallHelper.java @@ -35,7 +35,6 @@ import org.hamcrest.Description; import org.hamcrest.TypeSafeMatcher; import org.junit.Assume; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; @@ -53,9 +52,9 @@ import static org.mockito.Mockito.times; /** - * Test class to hold all WasbRemoteCallHelper tests + * Test class to hold all WasbRemoteCallHelper tests. */ -public class TestWasbRemoteCallHelper +public class ITestWasbRemoteCallHelper extends AbstractWasbTestBase { public static final String EMPTY_STRING = ""; private static final int INVALID_HTTP_STATUS_CODE_999 = 999; @@ -68,23 +67,21 @@ protected AzureBlobStorageTestAccount createTestAccount() throws Exception { return AzureBlobStorageTestAccount.create(conf); } - @Before - public void beforeMethod() { + @Override + public void setUp() throws Exception { + super.setUp(); boolean useSecureMode = fs.getConf().getBoolean(KEY_USE_SECURE_MODE, false); - boolean useAuthorization = fs.getConf().getBoolean(NativeAzureFileSystem.KEY_AZURE_AUTHORIZATION, false); + boolean useAuthorization = fs.getConf() + .getBoolean(NativeAzureFileSystem.KEY_AZURE_AUTHORIZATION, false); Assume.assumeTrue("Test valid when both SecureMode and Authorization are enabled .. skipping", useSecureMode && useAuthorization); - - Assume.assumeTrue( - useSecureMode && useAuthorization - ); } @Rule public ExpectedException expectedEx = ExpectedException.none(); /** - * Test invalid status-code + * Test invalid status-code. * @throws Throwable */ @Test // (expected = WasbAuthorizationException.class) @@ -95,15 +92,17 @@ public void testInvalidStatusCode() throws Throwable { // set up mocks HttpClient mockHttpClient = Mockito.mock(HttpClient.class); HttpResponse mockHttpResponse = Mockito.mock(HttpResponse.class); - Mockito.when(mockHttpClient.execute(Mockito.any())).thenReturn(mockHttpResponse); - Mockito.when(mockHttpResponse.getStatusLine()).thenReturn(newStatusLine(INVALID_HTTP_STATUS_CODE_999)); + Mockito.when(mockHttpClient.execute(Mockito.any())) + .thenReturn(mockHttpResponse); + Mockito.when(mockHttpResponse.getStatusLine()) + .thenReturn(newStatusLine(INVALID_HTTP_STATUS_CODE_999)); // finished setting up mocks performop(mockHttpClient); } /** - * Test invalid Content-Type + * Test invalid Content-Type. * @throws Throwable */ @Test // (expected = WasbAuthorizationException.class) @@ -124,7 +123,7 @@ public void testInvalidContentType() throws Throwable { } /** - * Test missing Content-Length + * Test missing Content-Length. * @throws Throwable */ @Test // (expected = WasbAuthorizationException.class) @@ -145,7 +144,7 @@ public void testMissingContentLength() throws Throwable { } /** - * Test Content-Length exceeds max + * Test Content-Length exceeds max. * @throws Throwable */ @Test // (expected = WasbAuthorizationException.class) @@ -191,7 +190,7 @@ public void testInvalidContentLengthValue() throws Throwable { } /** - * Test valid JSON response + * Test valid JSON response. * @throws Throwable */ @Test @@ -220,7 +219,7 @@ public void testValidJSONResponse() throws Throwable { } /** - * Test malformed JSON response + * Test malformed JSON response. * @throws Throwable */ @Test // (expected = WasbAuthorizationException.class) @@ -250,7 +249,7 @@ public void testMalFormedJSONResponse() throws Throwable { } /** - * Test valid JSON response failure response code + * Test valid JSON response failure response code. * @throws Throwable */ @Test // (expected = WasbAuthorizationException.class) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbUriAndConfiguration.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbUriAndConfiguration.java similarity index 98% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbUriAndConfiguration.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbUriAndConfiguration.java index 672ed9c2e8a..bee02206d60 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbUriAndConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbUriAndConfiguration.java @@ -19,11 +19,6 @@ package org.apache.hadoop.fs.azure; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; import static org.junit.Assume.assumeNotNull; import java.io.ByteArrayInputStream; @@ -36,6 +31,7 @@ import java.util.EnumSet; import java.io.File; +import org.apache.hadoop.fs.azure.integration.AzureTestUtils; import org.apache.hadoop.security.ProviderUtils; import org.apache.hadoop.security.alias.CredentialProvider; import org.apache.hadoop.security.alias.CredentialProviderFactory; @@ -57,7 +53,7 @@ import com.microsoft.azure.storage.blob.CloudBlobContainer; import com.microsoft.azure.storage.blob.CloudBlockBlob; -public class TestWasbUriAndConfiguration { +public class ITestWasbUriAndConfiguration extends AbstractWasbTestWithTimeout { private static final int FILE_SIZE = 4096; private static final String PATH_DELIMITER = "/"; @@ -73,10 +69,7 @@ public class TestWasbUriAndConfiguration { @After public void tearDown() throws Exception { - if (testAccount != null) { - testAccount.cleanup(); - testAccount = null; - } + testAccount = AzureTestUtils.cleanupTestAccount(testAccount); } @Before diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/MockWasbAuthorizerImpl.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/MockWasbAuthorizerImpl.java index 9fbab490726..73544998d30 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/MockWasbAuthorizerImpl.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/MockWasbAuthorizerImpl.java @@ -38,11 +38,12 @@ public class MockWasbAuthorizerImpl implements WasbAuthorizerInterface { private boolean performOwnerMatch; private CachingAuthorizer cache; - // The full qualified URL to the root directory + // The full qualified URL to the root directory private String qualifiedPrefixUrl; public MockWasbAuthorizerImpl(NativeAzureFileSystem fs) { - qualifiedPrefixUrl = new Path("/").makeQualified(fs.getUri(), fs.getWorkingDirectory()) + qualifiedPrefixUrl = new Path("/").makeQualified(fs.getUri(), + fs.getWorkingDirectory()) .toString().replaceAll("/$", ""); cache = new CachingAuthorizer<>(TimeUnit.MINUTES.convert(5L, TimeUnit.MINUTES), "AUTHORIZATION"); } @@ -64,19 +65,23 @@ public void init(Configuration conf, boolean matchOwner) { public void addAuthRule(String wasbAbsolutePath, String accessType, boolean access) { - wasbAbsolutePath = qualifiedPrefixUrl + wasbAbsolutePath; - AuthorizationComponent component = wasbAbsolutePath.endsWith("*") - ? new AuthorizationComponent("^" + wasbAbsolutePath.replace("*", ".*"), accessType) + wasbAbsolutePath = qualifiedPrefixUrl + wasbAbsolutePath; + AuthorizationComponent component = wasbAbsolutePath.endsWith("*") + ? new AuthorizationComponent("^" + wasbAbsolutePath.replace("*", ".*"), + accessType) : new AuthorizationComponent(wasbAbsolutePath, accessType); this.authRules.put(component, access); } @Override - public boolean authorize(String wasbAbsolutePath, String accessType, String owner) + public boolean authorize(String wasbAbsolutePath, + String accessType, + String owner) throws WasbAuthorizationException { - if (wasbAbsolutePath.endsWith(NativeAzureFileSystem.FolderRenamePending.SUFFIX)) { + if (wasbAbsolutePath.endsWith( + NativeAzureFileSystem.FolderRenamePending.SUFFIX)) { return true; } @@ -108,20 +113,23 @@ private boolean authorizeInternal(String wasbAbsolutePath, String accessType, St // In case of root("/"), owner match does not happen because owner is returned as empty string. // we try to force owner match just for purpose of tests to make sure all operations work seemlessly with owner. if (this.performOwnerMatch - && StringUtils.equalsIgnoreCase(wasbAbsolutePath, qualifiedPrefixUrl + "/")) { + && StringUtils.equalsIgnoreCase(wasbAbsolutePath, + qualifiedPrefixUrl + "/")) { owner = currentUserShortName; } boolean shouldEvaluateOwnerAccess = owner != null && !owner.isEmpty() - && this.performOwnerMatch; + && this.performOwnerMatch; - boolean isOwnerMatch = StringUtils.equalsIgnoreCase(currentUserShortName, owner); + boolean isOwnerMatch = StringUtils.equalsIgnoreCase(currentUserShortName, + owner); AuthorizationComponent component = new AuthorizationComponent(wasbAbsolutePath, accessType); if (authRules.containsKey(component)) { - return shouldEvaluateOwnerAccess ? isOwnerMatch && authRules.get(component) : authRules.get(component); + return shouldEvaluateOwnerAccess ? isOwnerMatch && authRules.get( + component) : authRules.get(component); } else { // Regex-pattern match if we don't have a straight match for (Map.Entry entry : authRules.entrySet()) { @@ -129,8 +137,11 @@ private boolean authorizeInternal(String wasbAbsolutePath, String accessType, St String keyPath = key.getWasbAbsolutePath(); String keyAccess = key.getAccessType(); - if (keyPath.endsWith("*") && Pattern.matches(keyPath, wasbAbsolutePath) && keyAccess.equals(accessType)) { - return shouldEvaluateOwnerAccess ? isOwnerMatch && entry.getValue() : entry.getValue(); + if (keyPath.endsWith("*") && Pattern.matches(keyPath, wasbAbsolutePath) + && keyAccess.equals(accessType)) { + return shouldEvaluateOwnerAccess + ? isOwnerMatch && entry.getValue() + : entry.getValue(); } } return false; @@ -141,47 +152,47 @@ public void deleteAllAuthRules() { authRules.clear(); cache.clear(); } + + private static class AuthorizationComponent { + + private final String wasbAbsolutePath; + private final String accessType; + + AuthorizationComponent(String wasbAbsolutePath, + String accessType) { + this.wasbAbsolutePath = wasbAbsolutePath; + this.accessType = accessType; + } + + @Override + public int hashCode() { + return this.wasbAbsolutePath.hashCode() ^ this.accessType.hashCode(); + } + + @Override + public boolean equals(Object obj) { + + if (obj == this) { + return true; + } + + if (obj == null + || !(obj instanceof AuthorizationComponent)) { + return false; + } + + return ((AuthorizationComponent) obj). + getWasbAbsolutePath().equals(this.wasbAbsolutePath) + && ((AuthorizationComponent) obj). + getAccessType().equals(this.accessType); + } + + public String getWasbAbsolutePath() { + return this.wasbAbsolutePath; + } + + public String getAccessType() { + return accessType; + } + } } - -class AuthorizationComponent { - - private String wasbAbsolutePath; - private String accessType; - - public AuthorizationComponent(String wasbAbsolutePath, - String accessType) { - this.wasbAbsolutePath = wasbAbsolutePath; - this.accessType = accessType; - } - - @Override - public int hashCode() { - return this.wasbAbsolutePath.hashCode() ^ this.accessType.hashCode(); - } - - @Override - public boolean equals(Object obj) { - - if (obj == this) { - return true; - } - - if (obj == null - || !(obj instanceof AuthorizationComponent)) { - return false; - } - - return ((AuthorizationComponent)obj). - getWasbAbsolutePath().equals(this.wasbAbsolutePath) - && ((AuthorizationComponent)obj). - getAccessType().equals(this.accessType); - } - - public String getWasbAbsolutePath() { - return this.wasbAbsolutePath; - } - - public String getAccessType() { - return accessType; - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java index 177477c61bd..726b5049b4c 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java @@ -18,12 +18,6 @@ package org.apache.hadoop.fs.azure; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileNotFoundException; @@ -47,16 +41,18 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.security.UserGroupInformation; import org.junit.Test; -import org.apache.hadoop.fs.azure.AzureException; import org.apache.hadoop.fs.azure.NativeAzureFileSystem.FolderRenamePending; import com.microsoft.azure.storage.AccessCondition; import com.microsoft.azure.storage.StorageException; import com.microsoft.azure.storage.blob.CloudBlob; +import static org.apache.hadoop.test.GenericTestUtils.*; + /* * Tests the Native Azure file system (WASB) against an actual blob store if * provided in the environment. @@ -71,15 +67,46 @@ public abstract class NativeAzureFileSystemBaseTest private final long modifiedTimeErrorMargin = 5 * 1000; // Give it +/-5 seconds public static final Log LOG = LogFactory.getLog(NativeAzureFileSystemBaseTest.class); + protected NativeAzureFileSystem fs; + + @Override + public void setUp() throws Exception { + super.setUp(); + fs = getFileSystem(); + } + + /** + * Assert that a path does not exist. + * + * @param message message to include in the assertion failure message + * @param path path in the filesystem + * @throws IOException IO problems + */ + public void assertPathDoesNotExist(String message, + Path path) throws IOException { + ContractTestUtils.assertPathDoesNotExist(fs, message, path); + } + + /** + * Assert that a path exists. + * + * @param message message to include in the assertion failure message + * @param path path in the filesystem + * @throws IOException IO problems + */ + public void assertPathExists(String message, + Path path) throws IOException { + ContractTestUtils.assertPathExists(fs, message, path); + } @Test public void testCheckingNonExistentOneLetterFile() throws Exception { - assertFalse(fs.exists(new Path("/a"))); + assertPathDoesNotExist("one letter file", new Path("/a")); } @Test public void testStoreRetrieveFile() throws Exception { - Path testFile = new Path("unit-test-file"); + Path testFile = methodPath(); writeString(testFile, "Testing"); assertTrue(fs.exists(testFile)); FileStatus status = fs.getFileStatus(testFile); @@ -93,7 +120,7 @@ public void testStoreRetrieveFile() throws Exception { @Test public void testStoreDeleteFolder() throws Exception { - Path testFolder = new Path("storeDeleteFolder"); + Path testFolder = methodPath(); assertFalse(fs.exists(testFolder)); assertTrue(fs.mkdirs(testFolder)); assertTrue(fs.exists(testFolder)); @@ -105,22 +132,22 @@ public void testStoreDeleteFolder() throws Exception { assertEquals(new FsPermission((short) 0755), status.getPermission()); Path innerFile = new Path(testFolder, "innerFile"); assertTrue(fs.createNewFile(innerFile)); - assertTrue(fs.exists(innerFile)); + assertPathExists("inner file", innerFile); assertTrue(fs.delete(testFolder, true)); - assertFalse(fs.exists(innerFile)); - assertFalse(fs.exists(testFolder)); + assertPathDoesNotExist("inner file", innerFile); + assertPathDoesNotExist("testFolder", testFolder); } @Test public void testFileOwnership() throws Exception { - Path testFile = new Path("ownershipTestFile"); + Path testFile = methodPath(); writeString(testFile, "Testing"); testOwnership(testFile); } @Test public void testFolderOwnership() throws Exception { - Path testFolder = new Path("ownershipTestFolder"); + Path testFolder = methodPath(); fs.mkdirs(testFolder); testOwnership(testFolder); } @@ -147,7 +174,7 @@ private static void assertEqualsIgnoreStickyBit(FsPermission expected, @Test public void testFilePermissions() throws Exception { - Path testFile = new Path("permissionTestFile"); + Path testFile = methodPath(); FsPermission permission = FsPermission.createImmutable((short) 644); createEmptyFile(testFile, permission); FileStatus ret = fs.getFileStatus(testFile); @@ -157,7 +184,7 @@ public void testFilePermissions() throws Exception { @Test public void testFolderPermissions() throws Exception { - Path testFolder = new Path("permissionTestFolder"); + Path testFolder = methodPath(); FsPermission permission = FsPermission.createImmutable((short) 644); fs.mkdirs(testFolder, permission); FileStatus ret = fs.getFileStatus(testFolder); @@ -176,9 +203,9 @@ void testDeepFileCreationBase(String testFilePath, String firstDirPath, String m createEmptyFile(testFile, permission); FsPermission rootPerm = fs.getFileStatus(firstDir.getParent()).getPermission(); FsPermission inheritPerm = FsPermission.createImmutable((short)(rootPerm.toShort() | 0300)); - assertTrue(fs.exists(testFile)); - assertTrue(fs.exists(firstDir)); - assertTrue(fs.exists(middleDir)); + assertPathExists("test file", testFile); + assertPathExists("firstDir", firstDir); + assertPathExists("middleDir", middleDir); // verify that the indirectly created directory inherited its permissions from the root directory FileStatus directoryStatus = fs.getFileStatus(middleDir); assertTrue(directoryStatus.isDirectory()); @@ -188,7 +215,7 @@ void testDeepFileCreationBase(String testFilePath, String firstDirPath, String m assertFalse(fileStatus.isDirectory()); assertEqualsIgnoreStickyBit(umaskedPermission, fileStatus.getPermission()); assertTrue(fs.delete(firstDir, true)); - assertFalse(fs.exists(testFile)); + assertPathDoesNotExist("deleted file", testFile); // An alternative test scenario would've been to delete the file first, // and then check for the existence of the upper folders still. But that @@ -264,7 +291,7 @@ public void testRenameImplicitFolder() throws Exception { assertTrue(fs.delete(new Path("deep"), true)); } - private static enum RenameFolderVariation { + private enum RenameFolderVariation { CreateFolderAndInnerFile, CreateJustInnerFile, CreateJustFolder } @@ -303,10 +330,10 @@ public void testCopyFromLocalFileSystem() throws Exception { localFs.delete(localFilePath, true); try { writeString(localFs, localFilePath, "Testing"); - Path dstPath = new Path("copiedFromLocal"); + Path dstPath = methodPath(); assertTrue(FileUtil.copy(localFs, localFilePath, fs, dstPath, false, fs.getConf())); - assertTrue(fs.exists(dstPath)); + assertPathExists("coied from local", dstPath); assertEquals("Testing", readString(fs, dstPath)); fs.delete(dstPath, true); } finally { @@ -423,32 +450,32 @@ public void testChineseCharactersFolderRename() throws Exception { @Test public void testReadingDirectoryAsFile() throws Exception { - Path dir = new Path("/x"); + Path dir = methodPath(); assertTrue(fs.mkdirs(dir)); try { fs.open(dir).close(); assertTrue("Should've thrown", false); } catch (FileNotFoundException ex) { - assertEquals("/x is a directory not a file.", ex.getMessage()); + assertExceptionContains("a directory not a file.", ex); } } @Test public void testCreatingFileOverDirectory() throws Exception { - Path dir = new Path("/x"); + Path dir = methodPath(); assertTrue(fs.mkdirs(dir)); try { fs.create(dir).close(); assertTrue("Should've thrown", false); } catch (IOException ex) { - assertEquals("Cannot create file /x; already exists as a directory.", - ex.getMessage()); + assertExceptionContains("Cannot create file", ex); + assertExceptionContains("already exists as a directory", ex); } } @Test public void testInputStreamReadWithZeroSizeBuffer() throws Exception { - Path newFile = new Path("zeroSizeRead"); + Path newFile = methodPath(); OutputStream output = fs.create(newFile); output.write(10); output.close(); @@ -460,7 +487,7 @@ public void testInputStreamReadWithZeroSizeBuffer() throws Exception { @Test public void testInputStreamReadWithBufferReturnsMinusOneOnEof() throws Exception { - Path newFile = new Path("eofRead"); + Path newFile = methodPath(); OutputStream output = fs.create(newFile); output.write(10); output.close(); @@ -482,7 +509,7 @@ public void testInputStreamReadWithBufferReturnsMinusOneOnEof() throws Exception @Test public void testInputStreamReadWithBufferReturnsMinusOneOnEofForLargeBuffer() throws Exception { - Path newFile = new Path("eofRead2"); + Path newFile = methodPath(); OutputStream output = fs.create(newFile); byte[] outputBuff = new byte[97331]; for(int i = 0; i < outputBuff.length; ++i) { @@ -508,7 +535,7 @@ public void testInputStreamReadWithBufferReturnsMinusOneOnEofForLargeBuffer() th @Test public void testInputStreamReadIntReturnsMinusOneOnEof() throws Exception { - Path newFile = new Path("eofRead3"); + Path newFile = methodPath(); OutputStream output = fs.create(newFile); output.write(10); output.close(); @@ -525,7 +552,7 @@ public void testInputStreamReadIntReturnsMinusOneOnEof() throws Exception { @Test public void testSetPermissionOnFile() throws Exception { - Path newFile = new Path("testPermission"); + Path newFile = methodPath(); OutputStream output = fs.create(newFile); output.write(13); output.close(); @@ -540,14 +567,14 @@ public void testSetPermissionOnFile() throws Exception { // Don't check the file length for page blobs. Only block blobs // provide the actual length of bytes written. - if (!(this instanceof TestNativeAzureFSPageBlobLive)) { + if (!(this instanceof ITestNativeAzureFSPageBlobLive)) { assertEquals(1, newStatus.getLen()); } } @Test public void testSetPermissionOnFolder() throws Exception { - Path newFolder = new Path("testPermission"); + Path newFolder = methodPath(); assertTrue(fs.mkdirs(newFolder)); FsPermission newPermission = new FsPermission((short) 0600); fs.setPermission(newFolder, newPermission); @@ -559,7 +586,7 @@ public void testSetPermissionOnFolder() throws Exception { @Test public void testSetOwnerOnFile() throws Exception { - Path newFile = new Path("testOwner"); + Path newFile = methodPath(); OutputStream output = fs.create(newFile); output.write(13); output.close(); @@ -571,7 +598,7 @@ public void testSetOwnerOnFile() throws Exception { // File length is only reported to be the size of bytes written to the file for block blobs. // So only check it for block blobs, not page blobs. - if (!(this instanceof TestNativeAzureFSPageBlobLive)) { + if (!(this instanceof ITestNativeAzureFSPageBlobLive)) { assertEquals(1, newStatus.getLen()); } fs.setOwner(newFile, null, "newGroup"); @@ -583,7 +610,7 @@ public void testSetOwnerOnFile() throws Exception { @Test public void testSetOwnerOnFolder() throws Exception { - Path newFolder = new Path("testOwner"); + Path newFolder = methodPath(); assertTrue(fs.mkdirs(newFolder)); fs.setOwner(newFolder, "newUser", null); FileStatus newStatus = fs.getFileStatus(newFolder); @@ -594,21 +621,21 @@ public void testSetOwnerOnFolder() throws Exception { @Test public void testModifiedTimeForFile() throws Exception { - Path testFile = new Path("testFile"); + Path testFile = methodPath(); fs.create(testFile).close(); testModifiedTime(testFile); } @Test public void testModifiedTimeForFolder() throws Exception { - Path testFolder = new Path("testFolder"); + Path testFolder = methodPath(); assertTrue(fs.mkdirs(testFolder)); testModifiedTime(testFolder); } @Test public void testFolderLastModifiedTime() throws Exception { - Path parentFolder = new Path("testFolder"); + Path parentFolder = methodPath(); Path innerFile = new Path(parentFolder, "innerfile"); assertTrue(fs.mkdirs(parentFolder)); @@ -983,7 +1010,7 @@ public void testRenameRedoFolderAlreadyDone() throws IOException { // Make sure rename pending file is gone. FileStatus[] listed = fs.listStatus(new Path("/")); - assertEquals(1, listed.length); + assertEquals("Pending directory still found", 1, listed.length); assertTrue(listed[0].isDirectory()); } @@ -1681,7 +1708,7 @@ public void run() { assertTrue("Unanticipated exception", false); } } else { - assertTrue("Unknown thread name", false); + fail("Unknown thread name"); } LOG.info(name + " is exiting."); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/RunningLiveWasbTests.txt b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/RunningLiveWasbTests.txt deleted file mode 100644 index 54ba4d822b6..00000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/RunningLiveWasbTests.txt +++ /dev/null @@ -1,22 +0,0 @@ -======================================================================== -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -========================================================================= - -In order to run Windows Azure Storage Blob (WASB) unit tests against a live -Azure Storage account, you need to provide test account details in a configuration -file called azure-test.xml. See hadoop-tools/hadoop-azure/README.txt for details -on configuration, and how to run the tests. \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestAzureConcurrentOutOfBandIoWithSecureMode.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestAzureConcurrentOutOfBandIoWithSecureMode.java deleted file mode 100644 index 687b7855306..00000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestAzureConcurrentOutOfBandIoWithSecureMode.java +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.fs.permission.PermissionStatus; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import java.io.DataInputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.Arrays; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; -import static org.junit.Assume.assumeNotNull; - -/** - * Extends TestAzureConcurrentOutOfBandIo in order to run testReadOOBWrites with secure mode - * (fs.azure.secure.mode) both enabled and disabled. - */ -public class TestAzureConcurrentOutOfBandIoWithSecureMode extends TestAzureConcurrentOutOfBandIo { - - // Overridden TestCase methods. - @Before - @Override - public void setUp() throws Exception { - testAccount = AzureBlobStorageTestAccount.createOutOfBandStore( - UPLOAD_BLOCK_SIZE, DOWNLOAD_BLOCK_SIZE, true); - assumeNotNull(testAccount); - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobMetadata.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobMetadata.java index 6c499266424..30c102839cb 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobMetadata.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobMetadata.java @@ -18,11 +18,6 @@ package org.apache.hadoop.fs.azure; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; - import java.io.Closeable; import java.io.IOException; import java.net.URI; @@ -42,7 +37,7 @@ /** * Tests that we put the correct metadata on blobs created through WASB. */ -public class TestBlobMetadata { +public class TestBlobMetadata extends AbstractWasbTestWithTimeout { private AzureBlobStorageTestAccount testAccount; private FileSystem fs; private InMemoryBlockBlobStore backingStore; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobOperationDescriptor.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobOperationDescriptor.java index 07d4ebc8632..aca5f810b4b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobOperationDescriptor.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobOperationDescriptor.java @@ -33,9 +33,6 @@ import java.net.HttpURLConnection; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertEquals; - /** * Tests for BlobOperationDescriptor. */ diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestClientThrottlingAnalyzer.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestClientThrottlingAnalyzer.java index 307e5af5775..c2496d7b925 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestClientThrottlingAnalyzer.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestClientThrottlingAnalyzer.java @@ -21,13 +21,10 @@ import org.apache.hadoop.fs.contract.ContractTestUtils.NanoTimer; import org.junit.Test; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.assertEquals; - /** * Tests for ClientThrottlingAnalyzer. */ -public class TestClientThrottlingAnalyzer { +public class TestClientThrottlingAnalyzer extends AbstractWasbTestWithTimeout { private static final int ANALYSIS_PERIOD = 1000; private static final int ANALYSIS_PERIOD_PLUS_10_PERCENT = ANALYSIS_PERIOD + ANALYSIS_PERIOD / 10; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAuthorization.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAuthorization.java index a3f28432b8b..4bf6f04c45d 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAuthorization.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAuthorization.java @@ -29,7 +29,6 @@ import org.apache.hadoop.util.StringUtils; import org.junit.Assume; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -49,8 +48,8 @@ public class TestNativeAzureFileSystemAuthorization protected MockWasbAuthorizerImpl authorizer; @Override - public Configuration getConfiguration() { - Configuration conf = super.getConfiguration(); + public Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); conf.set(NativeAzureFileSystem.KEY_AZURE_AUTHORIZATION, "true"); conf.set(RemoteWasbAuthorizerImpl.KEY_REMOTE_AUTH_SERVICE_URLS, "http://localhost/"); conf.set(NativeAzureFileSystem.AZURE_CHOWN_USERLIST_PROPERTY_NAME, "user1 , user2"); @@ -59,13 +58,12 @@ public Configuration getConfiguration() { @Override protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - Configuration conf = getConfiguration(); - return AzureBlobStorageTestAccount.create(conf); + return AzureBlobStorageTestAccount.create(createConfiguration()); } - - @Before - public void beforeMethod() { + @Override + public void setUp() throws Exception { + super.setUp(); boolean useSecureMode = fs.getConf().getBoolean(KEY_USE_SECURE_MODE, false); boolean useAuthorization = fs.getConf().getBoolean(NativeAzureFileSystem.KEY_AZURE_AUTHORIZATION, false); Assume.assumeTrue("Test valid when both SecureMode and Authorization are enabled .. skipping", @@ -76,7 +74,6 @@ public void beforeMethod() { fs.updateWasbAuthorizer(authorizer); } - @Rule public ExpectedException expectedEx = ExpectedException.none(); @@ -95,7 +92,7 @@ protected void allowRecursiveDelete(NativeAzureFileSystem fs, String path) { } /** - * Setup the expected exception class, and exception message that the test is supposed to fail with + * Setup the expected exception class, and exception message that the test is supposed to fail with. */ protected void setExpectedFailureMessage(String operation, Path path) { expectedEx.expect(WasbAuthorizationException.class); @@ -104,7 +101,7 @@ protected void setExpectedFailureMessage(String operation, Path path) { } /** - * Positive test to verify Create access check + * Positive test to verify Create access check. * The file is created directly under an existing folder. * No intermediate folders need to be created. * @throws Throwable @@ -128,7 +125,7 @@ public void testCreateAccessWithoutCreateIntermediateFoldersCheckPositive() thro } /** - * Positive test to verify Create access check + * Positive test to verify Create access check. * The test tries to create a file whose parent is non-existent to ensure that * the intermediate folders between ancestor and direct parent are being created * when proper ranger policies are configured. @@ -155,7 +152,7 @@ public void testCreateAccessWithCreateIntermediateFoldersCheckPositive() throws /** - * Negative test to verify that create fails when trying to overwrite an existing file + * Negative test to verify that create fails when trying to overwrite an existing file. * without proper write permissions on the file being overwritten. * @throws Throwable */ @@ -181,7 +178,7 @@ public void testCreateAccessWithOverwriteCheckNegative() throws Throwable { } /** - * Positive test to verify that create succeeds when trying to overwrite an existing file + * Positive test to verify that create succeeds when trying to overwrite an existing file. * when proper write permissions on the file being overwritten are provided. * @throws Throwable */ @@ -232,7 +229,7 @@ public void testCreateAccessCheckNegative() throws Throwable { } /** - * Positive test to verify listStatus access check + * Positive test to verify listStatus access check. * @throws Throwable */ @Test @@ -257,7 +254,7 @@ public void testListAccessCheckPositive() throws Throwable { } /** - * Negative test to verify listStatus access check + * Negative test to verify listStatus access check. * @throws Throwable */ @@ -342,7 +339,7 @@ public void testRenameAccessCheckNegative() throws Throwable { } /** - * Negative test to verify rename access check - the dstFolder disallows rename + * Negative test to verify rename access check - the dstFolder disallows rename. * @throws Throwable */ @Test //(expected=WasbAuthorizationException.class) @@ -373,7 +370,7 @@ public void testRenameAccessCheckNegativeOnDstFolder() throws Throwable { } /** - * Positive test to verify rename access check - the dstFolder allows rename + * Positive test to verify rename access check - the dstFolder allows rename. * @throws Throwable */ @Test @@ -484,7 +481,7 @@ public void testReadAccessCheckNegative() throws Throwable { } /** - * Positive test to verify file delete access check + * Positive test to verify file delete access check. * @throws Throwable */ @Test @@ -506,7 +503,7 @@ public void testFileDeleteAccessCheckPositive() throws Throwable { } /** - * Negative test to verify file delete access check + * Negative test to verify file delete access check. * @throws Throwable */ @Test //(expected=WasbAuthorizationException.class) @@ -544,7 +541,7 @@ public void testFileDeleteAccessCheckNegative() throws Throwable { /** * Positive test to verify file delete access check, with intermediate folders - * Uses wildcard recursive permissions + * Uses wildcard recursive permissions. * @throws Throwable */ @Test @@ -582,7 +579,7 @@ public void testGetFileStatusPositive() throws Throwable { } /** - * Positive test for mkdirs access check + * Positive test for mkdirs access check. * @throws Throwable */ @Test @@ -668,7 +665,7 @@ public void testMkdirsWithExistingHierarchyCheckPositive2() throws Throwable { } } /** - * Negative test for mkdirs access check + * Negative test for mkdirs access check. * @throws Throwable */ @Test //(expected=WasbAuthorizationException.class) @@ -692,7 +689,7 @@ public void testMkdirsCheckNegative() throws Throwable { } /** - * Positive test triple slash format (wasb:///) access check + * Positive test triple slash format (wasb:///) access check. * @throws Throwable */ @Test @@ -708,7 +705,7 @@ public void testListStatusWithTripleSlashCheckPositive() throws Throwable { } /** - * Negative test for setOwner when Authorization is enabled + * Negative test for setOwner when Authorization is enabled. */ @Test public void testSetOwnerThrowsForUnauthorisedUsers() throws Throwable { @@ -744,7 +741,7 @@ public Void run() throws Exception { /** * Test for setOwner when Authorization is enabled and - * the user is specified in chown allowed user list + * the user is specified in chown allowed user list. * */ @Test public void testSetOwnerSucceedsForAuthorisedUsers() throws Throwable { @@ -785,7 +782,7 @@ public Void run() throws Exception { /** * Test for setOwner when Authorization is enabled and - * the userlist is specified as '*' + * the userlist is specified as '*'. * */ @Test public void testSetOwnerSucceedsForAnyUserWhenWildCardIsSpecified() throws Throwable { @@ -829,7 +826,7 @@ public Void run() throws Exception { } /** Test for setOwner throws for illegal setup of chown - * allowed testSetOwnerSucceedsForAuthorisedUsers + * allowed testSetOwnerSucceedsForAuthorisedUsers. */ @Test public void testSetOwnerFailsForIllegalSetup() throws Throwable { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemBlockLocations.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemBlockLocations.java index b2660bbd885..b280cac7137 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemBlockLocations.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemBlockLocations.java @@ -18,8 +18,6 @@ package org.apache.hadoop.fs.azure; -import static org.junit.Assert.assertEquals; - import java.io.OutputStream; import org.apache.hadoop.conf.Configuration; @@ -29,7 +27,11 @@ import org.apache.hadoop.fs.Path; import org.junit.Test; -public class TestNativeAzureFileSystemBlockLocations { +/** + * Test block location logic. + */ +public class TestNativeAzureFileSystemBlockLocations + extends AbstractWasbTestWithTimeout { @Test public void testNumberOfBlocks() throws Exception { Configuration conf = new Configuration(); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemConcurrency.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemConcurrency.java index cbfc5639ca0..655ae90c6d2 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemConcurrency.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemConcurrency.java @@ -18,11 +18,6 @@ package org.apache.hadoop.fs.azure; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; - import java.io.OutputStream; import java.io.PrintWriter; import java.io.StringWriter; @@ -33,32 +28,30 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.StringUtils; -import org.junit.After; -import org.junit.Before; import org.junit.Test; -public class TestNativeAzureFileSystemConcurrency { - private AzureBlobStorageTestAccount testAccount; - private FileSystem fs; +public class TestNativeAzureFileSystemConcurrency extends AbstractWasbTestBase { private InMemoryBlockBlobStore backingStore; - @Before + @Override public void setUp() throws Exception { - testAccount = AzureBlobStorageTestAccount.createMock(); - fs = testAccount.getFileSystem(); - backingStore = testAccount.getMockStorage().getBackingStore(); + super.setUp(); + backingStore = getTestAccount().getMockStorage().getBackingStore(); } - @After + @Override public void tearDown() throws Exception { - testAccount.cleanup(); - fs = null; + super.tearDown(); backingStore = null; } + @Override + protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + return AzureBlobStorageTestAccount.createMock(); + } + @Test public void testLinkBlobs() throws Exception { Path filePath = new Path("/inProgress"); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractMocked.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractMocked.java index f458bb3db4c..28092609ac3 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractMocked.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractMocked.java @@ -23,6 +23,9 @@ import org.junit.Ignore; import org.junit.Test; +/** + * Mocked testing of FileSystemContractBaseTest. + */ public class TestNativeAzureFileSystemContractMocked extends FileSystemContractBaseTest { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemFileNameCheck.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemFileNameCheck.java index 82eabaa673d..0dfbb372f31 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemFileNameCheck.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemFileNameCheck.java @@ -18,17 +18,11 @@ package org.apache.hadoop.fs.azure; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - import java.io.IOException; import java.util.HashMap; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.junit.After; -import org.junit.Before; + import org.junit.Test; /** @@ -38,24 +32,18 @@ * creation/rename of files/directories through WASB that have colons in the * names. */ -public class TestNativeAzureFileSystemFileNameCheck { - private FileSystem fs = null; - private AzureBlobStorageTestAccount testAccount = null; +public class TestNativeAzureFileSystemFileNameCheck extends AbstractWasbTestBase { private String root = null; - @Before + @Override public void setUp() throws Exception { - testAccount = AzureBlobStorageTestAccount.createMock(); - fs = testAccount.getFileSystem(); + super.setUp(); root = fs.getUri().toString(); } - @After - public void tearDown() throws Exception { - testAccount.cleanup(); - root = null; - fs = null; - testAccount = null; + @Override + protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + return AzureBlobStorageTestAccount.createMock(); } @Test @@ -138,4 +126,4 @@ private boolean runWasbFsck(Path p) throws Exception { fsck.run(new String[] { p.toString() }); return fsck.getPathNameWarning(); } -} \ No newline at end of file +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemMocked.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemMocked.java index aa1e4f79c58..20d45b24d94 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemMocked.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemMocked.java @@ -21,6 +21,10 @@ import java.io.IOException; import org.junit.Ignore; +/** + * Run {@link NativeAzureFileSystemBaseTest} tests against a mocked store, + * skipping tests of unsupported features + */ public class TestNativeAzureFileSystemMocked extends NativeAzureFileSystemBaseTest { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemUploadLogic.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemUploadLogic.java index 4c2df8d68c0..7f63295c133 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemUploadLogic.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemUploadLogic.java @@ -18,41 +18,27 @@ package org.apache.hadoop.fs.azure; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; - import java.io.ByteArrayInputStream; import java.io.InputStream; import java.io.OutputStream; import org.apache.hadoop.fs.Path; -import org.junit.After; -import org.junit.Before; import org.junit.Ignore; import org.junit.Test; /** * Tests for the upload, buffering and flush logic in WASB. */ -public class TestNativeAzureFileSystemUploadLogic { - private AzureBlobStorageTestAccount testAccount; +public class TestNativeAzureFileSystemUploadLogic extends AbstractWasbTestBase { // Just an arbitrary number so that the values I write have a predictable // pattern: 0, 1, 2, .. , 45, 46, 0, 1, 2, ... static final int byteValuePeriod = 47; - @Before - public void setUp() throws Exception { - testAccount = AzureBlobStorageTestAccount.createMock(); - } - - @After - public void tearDown() throws Exception { - if (testAccount != null) { - testAccount.cleanup(); - testAccount = null; - } + @Override + protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + return AzureBlobStorageTestAccount.createMock(); } /** @@ -126,9 +112,9 @@ private void assertDataInStream(InputStream inStream, int expectedSize) * @param expectedSize The expected size of the data in there. */ private void assertDataInFile(Path file, int expectedSize) throws Exception { - InputStream inStream = testAccount.getFileSystem().open(file); - assertDataInStream(inStream, expectedSize); - inStream.close(); + try(InputStream inStream = getFileSystem().open(file)) { + assertDataInStream(inStream, expectedSize); + } } /** @@ -139,7 +125,7 @@ private void assertDataInFile(Path file, int expectedSize) throws Exception { private void assertDataInTempBlob(int expectedSize) throws Exception { // Look for the temporary upload blob in the backing store. InMemoryBlockBlobStore backingStore = - testAccount.getMockStorage().getBackingStore(); + getTestAccount().getMockStorage().getBackingStore(); String tempKey = null; for (String key : backingStore.getKeys()) { if (key.contains(NativeAzureFileSystem.AZURE_TEMP_FOLDER)) { @@ -149,9 +135,10 @@ private void assertDataInTempBlob(int expectedSize) throws Exception { } } assertNotNull(tempKey); - InputStream inStream = new ByteArrayInputStream(backingStore.getContent(tempKey)); - assertDataInStream(inStream, expectedSize); - inStream.close(); + try (InputStream inStream = new ByteArrayInputStream( + backingStore.getContent(tempKey))) { + assertDataInStream(inStream, expectedSize); + } } /** @@ -162,25 +149,30 @@ private void assertDataInTempBlob(int expectedSize) throws Exception { */ private void testConsistencyAfterManyFlushes(FlushFrequencyVariation variation) throws Exception { - Path uploadedFile = new Path("/uploadedFile"); - OutputStream outStream = testAccount.getFileSystem().create(uploadedFile); - final int totalSize = 9123; - int flushPeriod; - switch (variation) { - case BeforeSingleBufferFull: flushPeriod = 300; break; - case AfterSingleBufferFull: flushPeriod = 600; break; - case AfterAllRingBufferFull: flushPeriod = 1600; break; - default: - throw new IllegalArgumentException("Unknown variation: " + variation); - } - for (int i = 0; i < totalSize; i++) { - outStream.write(i % byteValuePeriod); - if ((i + 1) % flushPeriod == 0) { - outStream.flush(); - assertDataInTempBlob(i + 1); + Path uploadedFile = methodPath(); + try { + OutputStream outStream = getFileSystem().create(uploadedFile); + final int totalSize = 9123; + int flushPeriod; + switch (variation) { + case BeforeSingleBufferFull: flushPeriod = 300; break; + case AfterSingleBufferFull: flushPeriod = 600; break; + case AfterAllRingBufferFull: flushPeriod = 1600; break; + default: + throw new IllegalArgumentException("Unknown variation: " + variation); } + for (int i = 0; i < totalSize; i++) { + outStream.write(i % byteValuePeriod); + if ((i + 1) % flushPeriod == 0) { + outStream.flush(); + assertDataInTempBlob(i + 1); + } + } + outStream.close(); + assertDataInFile(uploadedFile, totalSize); + } finally { + getFileSystem().delete(uploadedFile, false); + } - outStream.close(); - assertDataInFile(uploadedFile, totalSize); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestOutOfBandAzureBlobOperations.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestOutOfBandAzureBlobOperations.java index 544d6ab4fc0..303a89ac4fe 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestOutOfBandAzureBlobOperations.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestOutOfBandAzureBlobOperations.java @@ -18,11 +18,6 @@ package org.apache.hadoop.fs.azure; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; - import java.util.HashMap; import org.apache.hadoop.fs.FileStatus; @@ -37,7 +32,8 @@ * Tests that WASB handles things gracefully when users add blobs to the Azure * Storage container from outside WASB's control. */ -public class TestOutOfBandAzureBlobOperations { +public class TestOutOfBandAzureBlobOperations + extends AbstractWasbTestWithTimeout { private AzureBlobStorageTestAccount testAccount; private FileSystem fs; private InMemoryBlockBlobStore backingStore; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestShellDecryptionKeyProvider.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestShellDecryptionKeyProvider.java index 0bf33d8138b..0334c39a5c3 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestShellDecryptionKeyProvider.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestShellDecryptionKeyProvider.java @@ -19,20 +19,23 @@ package org.apache.hadoop.fs.azure; import static org.apache.hadoop.test.PlatformAssumptions.assumeWindows; -import static org.junit.Assert.assertEquals; import java.io.File; import org.apache.commons.io.FileUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.junit.Assert; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -public class TestShellDecryptionKeyProvider { - public static final Log LOG = LogFactory - .getLog(TestShellDecryptionKeyProvider.class); +/** + * Windows only tests of shell scripts to provide decryption keys. + */ +public class TestShellDecryptionKeyProvider + extends AbstractWasbTestWithTimeout { + public static final Logger LOG = LoggerFactory + .getLogger(TestShellDecryptionKeyProvider.class); private static File TEST_ROOT_DIR = new File(System.getProperty( "test.build.data", "/tmp"), "TestShellDecryptionKeyProvider"); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbFsck.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbFsck.java index 467424b98c1..9d32fb2e443 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbFsck.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbFsck.java @@ -18,10 +18,6 @@ package org.apache.hadoop.fs.azure; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -32,7 +28,10 @@ import org.junit.Ignore; import org.junit.Test; -public class TestWasbFsck { +/** + * Tests which look at fsck recovery. + */ +public class TestWasbFsck extends AbstractWasbTestWithTimeout { private AzureBlobStorageTestAccount testAccount; private FileSystem fs; private InMemoryBlockBlobStore backingStore; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractAppend.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractAppend.java similarity index 91% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractAppend.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractAppend.java index 8a2341e762d..fd21bd20b2e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractAppend.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractAppend.java @@ -21,10 +21,14 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.contract.AbstractContractAppendTest; import org.apache.hadoop.fs.contract.AbstractFSContract; -import org.junit.Test; import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; -public class TestAzureNativeContractAppend extends AbstractContractAppendTest { +/** + * Append test, skipping one of them. + */ + +public class ITestAzureNativeContractAppend extends AbstractContractAppendTest { + @Override protected AbstractFSContract createContract(Configuration conf) { return new NativeAzureFileSystemContract(conf); @@ -34,4 +38,4 @@ protected AbstractFSContract createContract(Configuration conf) { public void testRenameFileBeingAppended() throws Throwable { skip("Skipping as renaming an opened file is not supported"); } -} \ No newline at end of file +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractCreate.java similarity index 91% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractCreate.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractCreate.java index 531552dad1b..0ac046a3026 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractCreate.java @@ -22,7 +22,11 @@ import org.apache.hadoop.fs.contract.AbstractContractCreateTest; import org.apache.hadoop.fs.contract.AbstractFSContract; -public class TestAzureNativeContractCreate extends AbstractContractCreateTest{ +/** + * Contract test. + */ +public class ITestAzureNativeContractCreate extends AbstractContractCreateTest { + @Override protected AbstractFSContract createContract(Configuration conf) { return new NativeAzureFileSystemContract(conf); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractDelete.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractDelete.java similarity index 91% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractDelete.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractDelete.java index 5e5c13b1345..4c6dd484a5d 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractDelete.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractDelete.java @@ -22,9 +22,12 @@ import org.apache.hadoop.fs.contract.AbstractContractDeleteTest; import org.apache.hadoop.fs.contract.AbstractFSContract; -public class TestAzureNativeContractDelete extends AbstractContractDeleteTest { +/** + * Contract test. + */ +public class ITestAzureNativeContractDelete extends AbstractContractDeleteTest { @Override protected AbstractFSContract createContract(Configuration conf) { return new NativeAzureFileSystemContract(conf); } -} \ No newline at end of file +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractDistCp.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractDistCp.java similarity index 70% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractDistCp.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractDistCp.java index a3750d46ab0..77695706d0b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractDistCp.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractDistCp.java @@ -19,15 +19,29 @@ package org.apache.hadoop.fs.azure.contract; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azure.integration.AzureTestConstants; import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; +import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.assumeScaleTestsEnabled; + /** * Contract test suite covering WASB integration with DistCp. */ -public class TestAzureNativeContractDistCp extends AbstractContractDistCpTest { +public class ITestAzureNativeContractDistCp extends AbstractContractDistCpTest { + + @Override + protected int getTestTimeoutMillis() { + return AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS; + } @Override protected NativeAzureFileSystemContract createContract(Configuration conf) { return new NativeAzureFileSystemContract(conf); } + + @Override + public void setup() throws Exception { + super.setup(); + assumeScaleTestsEnabled(getContract().getConf()); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractGetFileStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractGetFileStatus.java similarity index 90% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractGetFileStatus.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractGetFileStatus.java index b0c59eebe53..9c09c0d8e7b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractGetFileStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractGetFileStatus.java @@ -22,7 +22,12 @@ import org.apache.hadoop.fs.contract.AbstractContractGetFileStatusTest; import org.apache.hadoop.fs.contract.AbstractFSContract; -public class TestAzureNativeContractGetFileStatus extends AbstractContractGetFileStatusTest { +/** + * Contract test. + */ +public class ITestAzureNativeContractGetFileStatus + extends AbstractContractGetFileStatusTest { + @Override protected AbstractFSContract createContract(Configuration conf) { return new NativeAzureFileSystemContract(conf); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractMkdir.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractMkdir.java similarity index 91% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractMkdir.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractMkdir.java index 36df041ebaa..71654b8eca8 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractMkdir.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractMkdir.java @@ -22,9 +22,12 @@ import org.apache.hadoop.fs.contract.AbstractContractMkdirTest; import org.apache.hadoop.fs.contract.AbstractFSContract; -public class TestAzureNativeContractMkdir extends AbstractContractMkdirTest { +/** + * Contract test. + */ +public class ITestAzureNativeContractMkdir extends AbstractContractMkdirTest { @Override protected AbstractFSContract createContract(Configuration conf) { return new NativeAzureFileSystemContract(conf); } -} \ No newline at end of file +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractOpen.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractOpen.java similarity index 91% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractOpen.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractOpen.java index d5147acc666..0b174e606fa 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractOpen.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractOpen.java @@ -22,9 +22,13 @@ import org.apache.hadoop.fs.contract.AbstractContractOpenTest; import org.apache.hadoop.fs.contract.AbstractFSContract; -public class TestAzureNativeContractOpen extends AbstractContractOpenTest { +/** + * Contract test. + */ +public class ITestAzureNativeContractOpen extends AbstractContractOpenTest { + @Override protected AbstractFSContract createContract(Configuration conf) { return new NativeAzureFileSystemContract(conf); } -} \ No newline at end of file +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractRename.java similarity index 91% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractRename.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractRename.java index 4d8b2b57129..474b874e305 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractRename.java @@ -22,9 +22,13 @@ import org.apache.hadoop.fs.contract.AbstractContractRenameTest; import org.apache.hadoop.fs.contract.AbstractFSContract; -public class TestAzureNativeContractRename extends AbstractContractRenameTest { +/** + * Contract test. + */ +public class ITestAzureNativeContractRename extends AbstractContractRenameTest { + @Override protected AbstractFSContract createContract(Configuration conf) { return new NativeAzureFileSystemContract(conf); } -} \ No newline at end of file +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractSeek.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractSeek.java similarity index 91% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractSeek.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractSeek.java index 30046dcc4e7..673d5f89544 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractSeek.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractSeek.java @@ -22,9 +22,13 @@ import org.apache.hadoop.fs.contract.AbstractContractSeekTest; import org.apache.hadoop.fs.contract.AbstractFSContract; -public class TestAzureNativeContractSeek extends AbstractContractSeekTest{ +/** + * Contract test. + */ +public class ITestAzureNativeContractSeek extends AbstractContractSeekTest{ + @Override protected AbstractFSContract createContract(Configuration conf) { return new NativeAzureFileSystemContract(conf); } -} \ No newline at end of file +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/NativeAzureFileSystemContract.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/NativeAzureFileSystemContract.java index 28c13ead7b8..a264acabc54 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/NativeAzureFileSystemContract.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/NativeAzureFileSystemContract.java @@ -18,15 +18,21 @@ package org.apache.hadoop.fs.azure.contract; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azure.integration.AzureTestUtils; import org.apache.hadoop.fs.contract.AbstractBondedFSContract; +/** + * Azure Contract. Test paths are created using any maven fork + * identifier, if defined. This guarantees paths unique to tests + * running in parallel. + */ public class NativeAzureFileSystemContract extends AbstractBondedFSContract { public static final String CONTRACT_XML = "wasb.xml"; - protected NativeAzureFileSystemContract(Configuration conf) { - super(conf); - //insert the base features + public NativeAzureFileSystemContract(Configuration conf) { + super(conf); //insert the base features addConfResource(CONTRACT_XML); } @@ -34,4 +40,9 @@ protected NativeAzureFileSystemContract(Configuration conf) { public String getScheme() { return "wasb"; } -} \ No newline at end of file + + @Override + public Path getTestPath() { + return AzureTestUtils.createTestPath(super.getTestPath()); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AbstractAzureScaleTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AbstractAzureScaleTest.java new file mode 100644 index 00000000000..062d0733a32 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AbstractAzureScaleTest.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azure.integration; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.azure.AbstractWasbTestBase; +import org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount; + +import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.*; + +/** + * Scale tests are only executed if the scale profile + * is set; the setup method will check this and skip + * tests if not. + * + */ +public abstract class AbstractAzureScaleTest + extends AbstractWasbTestBase implements Sizes { + + protected static final Logger LOG = + LoggerFactory.getLogger(AbstractAzureScaleTest.class); + + @Override + protected int getTestTimeoutMillis() { + return AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS; + } + + @Override + public void setUp() throws Exception { + super.setUp(); + LOG.debug("Scale test operation count = {}", getOperationCount()); + assumeScaleTestsEnabled(getConfiguration()); + } + + /** + * Create the test account. + * @return a test account + * @throws Exception on any failure to create the account. + */ + protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + return AzureBlobStorageTestAccount.create(createConfiguration()); + } + + protected long getOperationCount() { + return getConfiguration().getLong(KEY_OPERATION_COUNT, + DEFAULT_OPERATION_COUNT); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestConstants.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestConstants.java new file mode 100644 index 00000000000..0b72f069410 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestConstants.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azure.integration; + +import org.apache.hadoop.fs.Path; + +/** + * Constants for the Azure tests. + */ +public interface AzureTestConstants { + + /** + * Prefix for any cross-filesystem scale test options. + */ + String SCALE_TEST = "scale.test."; + + /** + * Prefix for wasb-specific scale tests. + */ + String AZURE_SCALE_TEST = "fs.azure.scale.test."; + + /** + * Prefix for FS wasb tests. + */ + String TEST_FS_WASB = "test.fs.azure."; + + /** + * Name of the test filesystem. + */ + String TEST_FS_WASB_NAME = TEST_FS_WASB + "name"; + + /** + * Tell tests that they are being executed in parallel: {@value}. + */ + String KEY_PARALLEL_TEST_EXECUTION = "test.parallel.execution"; + + /** + * A property set to true in maven if scale tests are enabled: {@value}. + */ + String KEY_SCALE_TESTS_ENABLED = AZURE_SCALE_TEST + "enabled"; + + /** + * The number of operations to perform: {@value}. + */ + String KEY_OPERATION_COUNT = SCALE_TEST + "operation.count"; + + /** + * The number of directory operations to perform: {@value}. + */ + String KEY_DIRECTORY_COUNT = SCALE_TEST + "directory.count"; + + /** + * The readahead buffer: {@value}. + */ + String KEY_READ_BUFFER_SIZE = AZURE_SCALE_TEST + "read.buffer.size"; + + int DEFAULT_READ_BUFFER_SIZE = 16384; + + /** + * Key for a multi MB test file: {@value}. + */ + String KEY_CSVTEST_FILE = AZURE_SCALE_TEST + "csvfile"; + + /** + * Default path for the multi MB test file: {@value}. + */ + String DEFAULT_CSVTEST_FILE = "wasb://datasets@azuremlsampleexperiments.blob.core.windows.net/network_intrusion_detection.csv"; + + /** + * Name of the property to define the timeout for scale tests: {@value}. + * Measured in seconds. + */ + String KEY_TEST_TIMEOUT = AZURE_SCALE_TEST + "timeout"; + + /** + * Name of the property to define the file size for the huge file + * tests: {@value}. + * Measured in KB; a suffix like "M", or "G" will change the unit. + */ + String KEY_HUGE_FILESIZE = AZURE_SCALE_TEST + "huge.filesize"; + + /** + * Name of the property to define the partition size for the huge file + * tests: {@value}. + * Measured in KB; a suffix like "M", or "G" will change the unit. + */ + String KEY_HUGE_PARTITION_SIZE = AZURE_SCALE_TEST + "huge.partitionsize"; + + /** + * The default huge size is small —full 5GB+ scale tests are something + * to run in long test runs on EC2 VMs. {@value}. + */ + String DEFAULT_HUGE_FILESIZE = "10M"; + + /** + * The default number of operations to perform: {@value}. + */ + long DEFAULT_OPERATION_COUNT = 2005; + + /** + * Default number of directories to create when performing + * directory performance/scale tests. + */ + int DEFAULT_DIRECTORY_COUNT = 2; + + /** + * Default policy on scale tests: {@value}. + */ + boolean DEFAULT_SCALE_TESTS_ENABLED = false; + + /** + * Fork ID passed down from maven if the test is running in parallel. + */ + String TEST_UNIQUE_FORK_ID = "test.unique.fork.id"; + + /** + * Timeout in Milliseconds for standard tests: {@value}. + */ + int AZURE_TEST_TIMEOUT = 10 * 60 * 1000; + + /** + * Timeout in Seconds for Scale Tests: {@value}. + */ + int SCALE_TEST_TIMEOUT_SECONDS = 30 * 60; + + int SCALE_TEST_TIMEOUT_MILLIS = SCALE_TEST_TIMEOUT_SECONDS * 1000; + + + + String ACCOUNT_KEY_PROPERTY_NAME + = "fs.azure.account.key."; + String SAS_PROPERTY_NAME = "fs.azure.sas."; + String TEST_CONFIGURATION_FILE_NAME = "azure-test.xml"; + String TEST_ACCOUNT_NAME_PROPERTY_NAME + = "fs.azure.test.account.name"; + String MOCK_ACCOUNT_NAME + = "mockAccount.blob.core.windows.net"; + String MOCK_CONTAINER_NAME = "mockContainer"; + String WASB_AUTHORITY_DELIMITER = "@"; + String WASB_SCHEME = "wasb"; + String PATH_DELIMITER = "/"; + String AZURE_ROOT_CONTAINER = "$root"; + String MOCK_WASB_URI = "wasb://" + MOCK_CONTAINER_NAME + + WASB_AUTHORITY_DELIMITER + MOCK_ACCOUNT_NAME + "/"; + String USE_EMULATOR_PROPERTY_NAME + = "fs.azure.test.emulator"; + + String KEY_DISABLE_THROTTLING + = "fs.azure.disable.bandwidth.throttling"; + String KEY_READ_TOLERATE_CONCURRENT_APPEND + = "fs.azure.io.read.tolerate.concurrent.append"; + /** + * Path for page blobs: {@value}. + */ + String DEFAULT_PAGE_BLOB_DIRECTORY = "pageBlobs"; + + String DEFAULT_ATOMIC_RENAME_DIRECTORIES + = "/atomicRenameDir1,/atomicRenameDir2"; + + /** + * Base directory for page blobs. + */ + Path PAGE_BLOB_DIR = new Path("/" + DEFAULT_PAGE_BLOB_DIRECTORY); +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestUtils.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestUtils.java new file mode 100644 index 00000000000..2fbbcd1758c --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestUtils.java @@ -0,0 +1,479 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azure.integration; + +import java.io.IOException; +import java.net.URI; +import java.util.List; + +import org.junit.Assert; +import org.junit.Assume; +import org.junit.internal.AssumptionViolatedException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount; +import org.apache.hadoop.fs.azure.NativeAzureFileSystem; + +import static org.apache.hadoop.fs.azure.integration.AzureTestConstants.*; +import static org.apache.hadoop.test.MetricsAsserts.getLongCounter; +import static org.apache.hadoop.test.MetricsAsserts.getLongGauge; +import static org.apache.hadoop.test.MetricsAsserts.getMetrics; + +/** + * Utilities for the Azure tests. Based on {@code S3ATestUtils}, so + * (initially) has unused method. + */ +public final class AzureTestUtils extends Assert { + private static final Logger LOG = LoggerFactory.getLogger( + AzureTestUtils.class); + + /** + * Value to set a system property to (in maven) to declare that + * a property has been unset. + */ + public static final String UNSET_PROPERTY = "unset"; + + /** + * Create the test filesystem. + * + * If the test.fs.wasb.name property is not set, this will + * raise a JUnit assumption exception + * + * @param conf configuration + * @return the FS + * @throws IOException IO Problems + * @throws AssumptionViolatedException if the FS is not named + */ + public static NativeAzureFileSystem createTestFileSystem(Configuration conf) + throws IOException { + + String fsname = conf.getTrimmed(TEST_FS_WASB_NAME, ""); + + boolean liveTest = !StringUtils.isEmpty(fsname); + URI testURI = null; + if (liveTest) { + testURI = URI.create(fsname); + liveTest = testURI.getScheme().equals(WASB_SCHEME); + } + if (!liveTest) { + // Skip the test + throw new AssumptionViolatedException( + "No test filesystem in " + TEST_FS_WASB_NAME); + } + NativeAzureFileSystem fs1 = new NativeAzureFileSystem(); + fs1.initialize(testURI, conf); + return fs1; + } + + /** + * Create a file context for tests. + * + * If the test.fs.wasb.name property is not set, this will + * trigger a JUnit failure. + * + * Multipart purging is enabled. + * @param conf configuration + * @return the FS + * @throws IOException IO Problems + * @throws AssumptionViolatedException if the FS is not named + */ + public static FileContext createTestFileContext(Configuration conf) + throws IOException { + String fsname = conf.getTrimmed(TEST_FS_WASB_NAME, ""); + + boolean liveTest = !StringUtils.isEmpty(fsname); + URI testURI = null; + if (liveTest) { + testURI = URI.create(fsname); + liveTest = testURI.getScheme().equals(WASB_SCHEME); + } + if (!liveTest) { + // This doesn't work with our JUnit 3 style test cases, so instead we'll + // make this whole class not run by default + throw new AssumptionViolatedException("No test filesystem in " + + TEST_FS_WASB_NAME); + } + FileContext fc = FileContext.getFileContext(testURI, conf); + return fc; + } + + /** + * Get a long test property. + *
    + *
  1. Look up configuration value (which can pick up core-default.xml), + * using {@code defVal} as the default value (if conf != null). + *
  2. + *
  3. Fetch the system property.
  4. + *
  5. If the system property is not empty or "(unset)": + * it overrides the conf value. + *
  6. + *
+ * This puts the build properties in charge of everything. It's not a + * perfect design; having maven set properties based on a file, as ant let + * you do, is better for customization. + * + * As to why there's a special (unset) value, see + * {@link http://stackoverflow.com/questions/7773134/null-versus-empty-arguments-in-maven} + * @param conf config: may be null + * @param key key to look up + * @param defVal default value + * @return the evaluated test property. + */ + public static long getTestPropertyLong(Configuration conf, + String key, long defVal) { + return Long.valueOf( + getTestProperty(conf, key, Long.toString(defVal))); + } + /** + * Get a test property value in bytes, using k, m, g, t, p, e suffixes. + * {@link org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix#string2long(String)} + *
    + *
  1. Look up configuration value (which can pick up core-default.xml), + * using {@code defVal} as the default value (if conf != null). + *
  2. + *
  3. Fetch the system property.
  4. + *
  5. If the system property is not empty or "(unset)": + * it overrides the conf value. + *
  6. + *
+ * This puts the build properties in charge of everything. It's not a + * perfect design; having maven set properties based on a file, as ant let + * you do, is better for customization. + * + * As to why there's a special (unset) value, see + * {@link http://stackoverflow.com/questions/7773134/null-versus-empty-arguments-in-maven} + * @param conf config: may be null + * @param key key to look up + * @param defVal default value + * @return the evaluated test property. + */ + public static long getTestPropertyBytes(Configuration conf, + String key, String defVal) { + return org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix + .string2long(getTestProperty(conf, key, defVal)); + } + + /** + * Get an integer test property; algorithm described in + * {@link #getTestPropertyLong(Configuration, String, long)}. + * @param key key to look up + * @param defVal default value + * @return the evaluated test property. + */ + public static int getTestPropertyInt(Configuration conf, + String key, int defVal) { + return (int) getTestPropertyLong(conf, key, defVal); + } + + /** + * Get a boolean test property; algorithm described in + * {@link #getTestPropertyLong(Configuration, String, long)}. + * @param key key to look up + * @param defVal default value + * @return the evaluated test property. + */ + public static boolean getTestPropertyBool(Configuration conf, + String key, + boolean defVal) { + return Boolean.valueOf( + getTestProperty(conf, key, Boolean.toString(defVal))); + } + + /** + * Get a string test property. + *
    + *
  1. Look up configuration value (which can pick up core-default.xml), + * using {@code defVal} as the default value (if conf != null). + *
  2. + *
  3. Fetch the system property.
  4. + *
  5. If the system property is not empty or "(unset)": + * it overrides the conf value. + *
  6. + *
+ * This puts the build properties in charge of everything. It's not a + * perfect design; having maven set properties based on a file, as ant let + * you do, is better for customization. + * + * As to why there's a special (unset) value, see + * @see + * Stack Overflow + * @param conf config: may be null + * @param key key to look up + * @param defVal default value + * @return the evaluated test property. + */ + + public static String getTestProperty(Configuration conf, + String key, + String defVal) { + String confVal = conf != null + ? conf.getTrimmed(key, defVal) + : defVal; + String propval = System.getProperty(key); + return StringUtils.isNotEmpty(propval) && !UNSET_PROPERTY.equals(propval) + ? propval : confVal; + } + + /** + * Verify the class of an exception. If it is not as expected, rethrow it. + * Comparison is on the exact class, not subclass-of inference as + * offered by {@code instanceof}. + * @param clazz the expected exception class + * @param ex the exception caught + * @return the exception, if it is of the expected class + * @throws Exception the exception passed in. + */ + public static Exception verifyExceptionClass(Class clazz, + Exception ex) + throws Exception { + if (!(ex.getClass().equals(clazz))) { + throw ex; + } + return ex; + } + + /** + * Turn off FS Caching: use if a filesystem with different options from + * the default is required. + * @param conf configuration to patch + */ + public static void disableFilesystemCaching(Configuration conf) { + conf.setBoolean("fs.wasb.impl.disable.cache", true); + } + + /** + * Create a test path, using the value of + * {@link AzureTestUtils#TEST_UNIQUE_FORK_ID} if it is set. + * @param defVal default value + * @return a path + */ + public static Path createTestPath(Path defVal) { + String testUniqueForkId = System.getProperty( + AzureTestConstants.TEST_UNIQUE_FORK_ID); + return testUniqueForkId == null + ? defVal + : new Path("/" + testUniqueForkId, "test"); + } + + /** + * Create a test page blob path using the value of + * {@link AzureTestConstants#TEST_UNIQUE_FORK_ID} if it is set. + * @param filename filename at the end of the path + * @return an absolute path + */ + public static Path blobPathForTests(FileSystem fs, String filename) { + String testUniqueForkId = System.getProperty( + AzureTestConstants.TEST_UNIQUE_FORK_ID); + return fs.makeQualified(new Path(PAGE_BLOB_DIR, + testUniqueForkId == null + ? filename + : (testUniqueForkId + "/" + filename))); + } + + /** + * Create a test path using the value of + * {@link AzureTestConstants#TEST_UNIQUE_FORK_ID} if it is set. + * @param filename filename at the end of the path + * @return an absolute path + */ + public static Path pathForTests(FileSystem fs, String filename) { + String testUniqueForkId = System.getProperty( + AzureTestConstants.TEST_UNIQUE_FORK_ID); + return fs.makeQualified(new Path( + testUniqueForkId == null + ? ("/test/" + filename) + : (testUniqueForkId + "/" + filename))); + } + + /** + * Get a unique fork ID. + * Returns a default value for non-parallel tests. + * @return a string unique for all test VMs running in this maven build. + */ + public static String getForkID() { + return System.getProperty( + AzureTestConstants.TEST_UNIQUE_FORK_ID, "fork-1"); + } + + /** + * Flag to indicate that this test is being executed in parallel. + * This is used by some of the scale tests to validate test time expectations. + * @return true if the build indicates this test is being run in parallel. + */ + public static boolean isParallelExecution() { + return Boolean.getBoolean(KEY_PARALLEL_TEST_EXECUTION); + } + + /** + * Asserts that {@code obj} is an instance of {@code expectedClass} using a + * descriptive assertion message. + * @param expectedClass class + * @param obj object to check + */ + public static void assertInstanceOf(Class expectedClass, Object obj) { + Assert.assertTrue(String.format("Expected instance of class %s, but is %s.", + expectedClass, obj.getClass()), + expectedClass.isAssignableFrom(obj.getClass())); + } + + /** + * Builds a comma-separated list of class names. + * @param classes list of classes + * @return comma-separated list of class names + */ + public static > String buildClassListString( + List classes) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < classes.size(); ++i) { + if (i > 0) { + sb.append(','); + } + sb.append(classes.get(i).getName()); + } + return sb.toString(); + } + + /** + * This class should not be instantiated. + */ + private AzureTestUtils() { + } + + /** + * Assert that a configuration option matches the expected value. + * @param conf configuration + * @param key option key + * @param expected expected value + */ + public static void assertOptionEquals(Configuration conf, + String key, + String expected) { + assertEquals("Value of " + key, expected, conf.get(key)); + } + + /** + * Assume that a condition is met. If not: log at WARN and + * then throw an {@link AssumptionViolatedException}. + * @param message message in an assumption + * @param condition condition to probe + */ + public static void assume(String message, boolean condition) { + if (!condition) { + LOG.warn(message); + } + Assume.assumeTrue(message, condition); + } + + /** + * Gets the current value of the given gauge. + * @param fs filesystem + * @param gaugeName gauge name + * @return the gauge value + */ + public static long getLongGaugeValue(NativeAzureFileSystem fs, + String gaugeName) { + return getLongGauge(gaugeName, getMetrics(fs.getInstrumentation())); + } + + /** + * Gets the current value of the given counter. + * @param fs filesystem + * @param counterName counter name + * @return the counter value + */ + public static long getLongCounterValue(NativeAzureFileSystem fs, + String counterName) { + return getLongCounter(counterName, getMetrics(fs.getInstrumentation())); + } + + + /** + * Delete a path, catching any exception and downgrading to a log message. + * @param fs filesystem + * @param path path to delete + * @param recursive recursive delete? + * @throws IOException IO failure. + */ + public static void deleteQuietly(FileSystem fs, + Path path, + boolean recursive) throws IOException { + if (fs != null && path != null) { + try { + fs.delete(path, recursive); + } catch (IOException e) { + LOG.warn("When deleting {}", path, e); + } + } + } + + + /** + * Clean up the test account if non-null; return null to put in the + * field. + * @param testAccount test account to clean up + * @return null + * @throws Execption cleanup problems + */ + public static AzureBlobStorageTestAccount cleanup( + AzureBlobStorageTestAccount testAccount) throws Exception { + if (testAccount != null) { + testAccount.cleanup(); + testAccount = null; + } + return null; + } + + + /** + * Clean up the test account; any thrown exceptions are caught and + * logged. + * @param testAccount test account + * @return null, so that any fields can be reset. + */ + public static AzureBlobStorageTestAccount cleanupTestAccount( + AzureBlobStorageTestAccount testAccount) { + if (testAccount != null) { + try { + testAccount.cleanup(); + } catch (Exception e) { + LOG.error("While cleaning up test account: ", e); + } + } + return null; + } + + /** + * Assume that the scale tests are enabled by the relevant system property. + */ + public static void assumeScaleTestsEnabled(Configuration conf) { + boolean enabled = getTestPropertyBool( + conf, + KEY_SCALE_TESTS_ENABLED, + DEFAULT_SCALE_TESTS_ENABLED); + assume("Scale test disabled: to enable set property " + + KEY_SCALE_TESTS_ENABLED, + enabled); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/CleanupTestContainers.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/CleanupTestContainers.java new file mode 100644 index 00000000000..059a8c4aa7c --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/CleanupTestContainers.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azure.integration; + +import java.util.EnumSet; + +import com.microsoft.azure.storage.CloudStorageAccount; +import com.microsoft.azure.storage.blob.CloudBlobClient; +import com.microsoft.azure.storage.blob.CloudBlobContainer; +import org.junit.Test; + +import org.apache.hadoop.fs.azure.AbstractWasbTestBase; +import org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount; + +/** + * This looks like a test, but it is really a command to invoke to + * clean up containers created in other test runs. + * + */ +public class CleanupTestContainers extends AbstractWasbTestBase { + + private static final String CONTAINER_PREFIX = "wasbtests-"; + + @Override + protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + return AzureBlobStorageTestAccount.create( + "CleanupTestContainers", + EnumSet.noneOf(AzureBlobStorageTestAccount.CreateOptions.class), + createConfiguration(), + true); + } + + @Test + public void testEnumContainers() throws Throwable { + describe("Enumerating all the WASB test containers"); + + int count = 0; + CloudStorageAccount storageAccount = getTestAccount().getRealAccount(); + CloudBlobClient blobClient = storageAccount.createCloudBlobClient(); + Iterable containers + = blobClient.listContainers(CONTAINER_PREFIX); + for (CloudBlobContainer container : containers) { + count++; + LOG.info("Container {} URI {}", + container.getName(), + container.getUri()); + } + LOG.info("Found {} test containers", count); + } + + @Test + public void testDeleteContainers() throws Throwable { + describe("Delete all the WASB test containers"); + int count = 0; + CloudStorageAccount storageAccount = getTestAccount().getRealAccount(); + CloudBlobClient blobClient = storageAccount.createCloudBlobClient(); + Iterable containers + = blobClient.listContainers(CONTAINER_PREFIX); + for (CloudBlobContainer container : containers) { + LOG.info("Container {} URI {}", + container.getName(), + container.getUri()); + if (container.deleteIfExists()) { + count++; + } + } + LOG.info("Deleted {} test containers", count); + } + + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/ITestAzureHugeFiles.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/ITestAzureHugeFiles.java new file mode 100644 index 00000000000..850aca10024 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/ITestAzureHugeFiles.java @@ -0,0 +1,456 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azure.integration; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.EnumSet; +import java.util.Iterator; + +import org.junit.Assert; +import org.junit.Assume; +import org.junit.FixMethodOrder; +import org.junit.Test; +import org.junit.runners.MethodSorters; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.StorageStatistics; +import org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount; +import org.apache.hadoop.fs.azure.NativeAzureFileSystem; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.io.IOUtils; + +import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.*; +import static org.apache.hadoop.fs.contract.ContractTestUtils.*; + + +/** + * Scale test which creates a huge file. + * + * Important: the order in which these tests execute is fixed to + * alphabetical order. Test cases are numbered {@code test_123_} to impose + * an ordering based on the numbers. + * + * Having this ordering allows the tests to assume that the huge file + * exists. Even so: they should all have a {@link #assumeHugeFileExists()} + * check at the start, in case an individual test is executed. + * + * Ignore checkstyle complaints about naming: we need a scheme with visible + * ordering. + */ + +@FixMethodOrder(MethodSorters.NAME_ASCENDING) +public class ITestAzureHugeFiles extends AbstractAzureScaleTest { + + private static final Logger LOG = LoggerFactory.getLogger( + ITestAzureHugeFiles.class); + + private Path scaleTestDir; + private Path hugefile; + private Path hugefileRenamed; + private AzureBlobStorageTestAccount testAccountForCleanup; + + private static final int UPLOAD_BLOCKSIZE = 64 * S_1K; + private static final byte[] SOURCE_DATA; + + static { + SOURCE_DATA = dataset(UPLOAD_BLOCKSIZE, 0, S_256); + } + + private Path testPath; + + @Override + public void setUp() throws Exception { + super.setUp(); + testPath = path("ITestAzureHugeFiles"); + scaleTestDir = new Path(testPath, "scale"); + hugefile = new Path(scaleTestDir, "hugefile"); + hugefileRenamed = new Path(scaleTestDir, "hugefileRenamed"); + } + + /** + * Only clean up the test account (and delete the container) if the account + * is set in the field {@code testAccountForCleanup}. + * @throws Exception + */ + @Override + public void tearDown() throws Exception { + testAccount = null; + super.tearDown(); + if (testAccountForCleanup != null) { + cleanupTestAccount(testAccount); + } + } + + @Override + protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + return AzureBlobStorageTestAccount.create( + "testazurehugefiles", + EnumSet.of(AzureBlobStorageTestAccount.CreateOptions.CreateContainer), + createConfiguration(), + true); + } + + /** + * Stop the test-case teardown from deleting the test path. + * @throws IOException never + */ + protected void deleteTestDirInTeardown() throws IOException { + // this is a no-op, so the test file is preserved. + // the last test in the suite does the teardown + } + + protected void deleteHugeFile() throws IOException { + describe("Deleting %s", hugefile); + ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); + getFileSystem().delete(hugefile, false); + timer.end("time to delete %s", hugefile); + } + + /** + * Log how long an IOP took, by dividing the total time by the + * count of operations, printing in a human-readable form. + * @param operation operation being measured + * @param timer timing data + * @param count IOP count. + */ + protected void logTimePerIOP(String operation, + ContractTestUtils.NanoTimer timer, + long count) { + LOG.info("Time per {}: {} nS", + operation, toHuman(timer.duration() / count)); + } + + /** + * Assume that the huge file exists, skip if not/empty. + * @return the file status + * @throws IOException IO failure + */ + FileStatus assumeHugeFileExists() throws IOException { + assertPathExists(getFileSystem(), "huge file not created", hugefile); + try { + FileStatus status = getFileSystem().getFileStatus(hugefile); + Assume.assumeTrue("Not a file: " + status, status.isFile()); + Assume.assumeTrue("File " + hugefile + " is empty", status.getLen() > 0); + return status; + } catch (FileNotFoundException e) { + skip("huge file not created: " + hugefile); + } + return null; + } + + /** + * If/when {@link NativeAzureFileSystem#getStorageStatistics()} returns + * statistics, this will be interesting. + */ + private void logFSState() { + StorageStatistics statistics = getFileSystem().getStorageStatistics(); + Iterator longStatistics + = statistics.getLongStatistics(); + while (longStatistics.hasNext()) { + StorageStatistics.LongStatistic next = longStatistics.next(); + LOG.info("{} = {}", next.getName(), next.getValue()); + } + } + + @Test + public void test_010_CreateHugeFile() throws IOException { + long filesize = getTestPropertyBytes(getConfiguration(), + KEY_HUGE_FILESIZE, + DEFAULT_HUGE_FILESIZE); + long filesizeMB = filesize / S_1M; + + // clean up from any previous attempts + deleteHugeFile(); + + describe("Creating file %s of size %d MB", hugefile, filesizeMB); + + // now do a check of available upload time, with a pessimistic bandwidth + // (that of remote upload tests). If the test times out then not only is + // the test outcome lost, as the follow-on tests continue, they will + // overlap with the ongoing upload test, for much confusion. +/* + int timeout = getTestTimeoutSeconds(); + // assume 1 MB/s upload bandwidth + int bandwidth = _1MB; + long uploadTime = filesize / bandwidth; + assertTrue(String.format("Timeout set in %s seconds is too low;" + + " estimating upload time of %d seconds at 1 MB/s." + + " Rerun tests with -D%s=%d", + timeout, uploadTime, KEY_TEST_TIMEOUT, uploadTime * 2), + uploadTime < timeout); +*/ + assertEquals("File size set in " + KEY_HUGE_FILESIZE + " = " + filesize + + " is not a multiple of " + UPLOAD_BLOCKSIZE, + 0, filesize % UPLOAD_BLOCKSIZE); + + byte[] data = SOURCE_DATA; + + long blocks = filesize / UPLOAD_BLOCKSIZE; + long blocksPerMB = S_1M / UPLOAD_BLOCKSIZE; + + // perform the upload. + // there's lots of logging here, so that a tail -f on the output log + // can give a view of what is happening. + NativeAzureFileSystem fs = getFileSystem(); + + ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); + long blocksPer10MB = blocksPerMB * 10; + fs.mkdirs(hugefile.getParent()); + try (FSDataOutputStream out = fs.create(hugefile, + true, + UPLOAD_BLOCKSIZE, + null)) { + for (long block = 1; block <= blocks; block++) { + out.write(data); + long written = block * UPLOAD_BLOCKSIZE; + // every 10 MB and on file upload @ 100%, print some stats + if (block % blocksPer10MB == 0 || written == filesize) { + long percentage = written * 100 / filesize; + double elapsedTime = timer.elapsedTime() / NANOSEC; + double writtenMB = 1.0 * written / S_1M; + LOG.info(String.format("[%02d%%] Buffered %.2f MB out of %d MB;" + + " elapsedTime=%.2fs; write to buffer bandwidth=%.2f MB/s", + percentage, + writtenMB, + filesizeMB, + elapsedTime, + writtenMB / elapsedTime)); + } + } + // now close the file + LOG.info("Closing stream {}", out); + ContractTestUtils.NanoTimer closeTimer + = new ContractTestUtils.NanoTimer(); + out.close(); + closeTimer.end("time to close() output stream"); + } + + timer.end("time to write %d MB in blocks of %d", + filesizeMB, UPLOAD_BLOCKSIZE); + logFSState(); + bandwidth(timer, filesize); + ContractTestUtils.assertPathExists(fs, "Huge file", hugefile); + FileStatus status = fs.getFileStatus(hugefile); + ContractTestUtils.assertIsFile(hugefile, status); + assertEquals("File size in " + status, filesize, status.getLen()); + } + + @Test + public void test_040_PositionedReadHugeFile() throws Throwable { + assumeHugeFileExists(); + describe("Positioned reads of file %s", hugefile); + NativeAzureFileSystem fs = getFileSystem(); + FileStatus status = fs.getFileStatus(hugefile); + long filesize = status.getLen(); + int ops = 0; + final int bufferSize = 8192; + byte[] buffer = new byte[bufferSize]; + long eof = filesize - 1; + + ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); + ContractTestUtils.NanoTimer readAtByte0, readAtByte0Again, readAtEOF; + try (FSDataInputStream in = openDataFile()) { + readAtByte0 = new ContractTestUtils.NanoTimer(); + in.readFully(0, buffer); + readAtByte0.end("time to read data at start of file"); + ops++; + + readAtEOF = new ContractTestUtils.NanoTimer(); + in.readFully(eof - bufferSize, buffer); + readAtEOF.end("time to read data at end of file"); + ops++; + + readAtByte0Again = new ContractTestUtils.NanoTimer(); + in.readFully(0, buffer); + readAtByte0Again.end("time to read data at start of file again"); + ops++; + LOG.info("Final stream state: {}", in); + } + long mb = Math.max(filesize / S_1M, 1); + + logFSState(); + timer.end("time to performed positioned reads of %d MB ", mb); + LOG.info("Time per positioned read = {} nS", + toHuman(timer.nanosPerOperation(ops))); + } + + protected FSDataInputStream openDataFile() throws IOException { + NanoTimer openTimer = new NanoTimer(); + FSDataInputStream inputStream = getFileSystem().open(hugefile, + UPLOAD_BLOCKSIZE); + openTimer.end("open data file"); + return inputStream; + } + + + /** + * Work out the bandwidth in bytes/second. + * @param timer timer measuring the duration + * @param bytes bytes + * @return the number of bytes/second of the recorded operation + */ + public static double bandwidthInBytes(NanoTimer timer, long bytes) { + return bytes * NANOSEC / timer.duration(); + } + + @Test + public void test_050_readHugeFile() throws Throwable { + assumeHugeFileExists(); + describe("Reading %s", hugefile); + NativeAzureFileSystem fs = getFileSystem(); + FileStatus status = fs.getFileStatus(hugefile); + long filesize = status.getLen(); + long blocks = filesize / UPLOAD_BLOCKSIZE; + byte[] data = new byte[UPLOAD_BLOCKSIZE]; + + ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); + try (FSDataInputStream in = openDataFile()) { + for (long block = 0; block < blocks; block++) { + in.readFully(data); + } + LOG.info("Final stream state: {}", in); + } + + long mb = Math.max(filesize / S_1M, 1); + timer.end("time to read file of %d MB ", mb); + LOG.info("Time per MB to read = {} nS", + toHuman(timer.nanosPerOperation(mb))); + bandwidth(timer, filesize); + logFSState(); + } + + @Test + public void test_060_openAndReadWholeFileBlocks() throws Throwable { + FileStatus status = assumeHugeFileExists(); + int blockSize = S_1M; + describe("Open the test file and read it in blocks of size %d", + blockSize); + long len = status.getLen(); + FSDataInputStream in = openDataFile(); + NanoTimer timer2 = null; + long blockCount = 0; + long totalToRead = 0; + int resetCount = 0; + try { + byte[] block = new byte[blockSize]; + timer2 = new NanoTimer(); + long count = 0; + // implicitly rounding down here + blockCount = len / blockSize; + totalToRead = blockCount * blockSize; + long minimumBandwidth = S_128K; + int maxResetCount = 4; + resetCount = 0; + for (long i = 0; i < blockCount; i++) { + int offset = 0; + int remaining = blockSize; + long blockId = i + 1; + NanoTimer blockTimer = new NanoTimer(); + int reads = 0; + while (remaining > 0) { + NanoTimer readTimer = new NanoTimer(); + int bytesRead = in.read(block, offset, remaining); + reads++; + if (bytesRead == 1) { + break; + } + remaining -= bytesRead; + offset += bytesRead; + count += bytesRead; + readTimer.end(); + if (bytesRead != 0) { + LOG.debug("Bytes in read #{}: {} , block bytes: {}," + + " remaining in block: {}" + + " duration={} nS; ns/byte: {}, bandwidth={} MB/s", + reads, bytesRead, blockSize - remaining, remaining, + readTimer.duration(), + readTimer.nanosPerOperation(bytesRead), + readTimer.bandwidthDescription(bytesRead)); + } else { + LOG.warn("0 bytes returned by read() operation #{}", reads); + } + } + blockTimer.end("Reading block %d in %d reads", blockId, reads); + String bw = blockTimer.bandwidthDescription(blockSize); + LOG.info("Bandwidth of block {}: {} MB/s: ", blockId, bw); + if (bandwidthInBytes(blockTimer, blockSize) < minimumBandwidth) { + LOG.warn("Bandwidth {} too low on block {}: resetting connection", + bw, blockId); + Assert.assertTrue("Bandwidth of " + bw + " too low after " + + resetCount + " attempts", resetCount <= maxResetCount); + resetCount++; + // reset the connection + } + } + } finally { + IOUtils.closeStream(in); + } + timer2.end("Time to read %d bytes in %d blocks", totalToRead, blockCount); + LOG.info("Overall Bandwidth {} MB/s; reset connections {}", + timer2.bandwidth(totalToRead), resetCount); + } + + @Test + public void test_100_renameHugeFile() throws Throwable { + assumeHugeFileExists(); + describe("renaming %s to %s", hugefile, hugefileRenamed); + NativeAzureFileSystem fs = getFileSystem(); + FileStatus status = fs.getFileStatus(hugefile); + long filesize = status.getLen(); + fs.delete(hugefileRenamed, false); + ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); + fs.rename(hugefile, hugefileRenamed); + long mb = Math.max(filesize / S_1M, 1); + timer.end("time to rename file of %d MB", mb); + LOG.info("Time per MB to rename = {} nS", + toHuman(timer.nanosPerOperation(mb))); + bandwidth(timer, filesize); + logFSState(); + FileStatus destFileStatus = fs.getFileStatus(hugefileRenamed); + assertEquals(filesize, destFileStatus.getLen()); + + // rename back + ContractTestUtils.NanoTimer timer2 = new ContractTestUtils.NanoTimer(); + fs.rename(hugefileRenamed, hugefile); + timer2.end("Renaming back"); + LOG.info("Time per MB to rename = {} nS", + toHuman(timer2.nanosPerOperation(mb))); + bandwidth(timer2, filesize); + } + + @Test + public void test_999_deleteHugeFiles() throws IOException { + // mark the test account for cleanup after this test + testAccountForCleanup = testAccount; + deleteHugeFile(); + ContractTestUtils.NanoTimer timer2 = new ContractTestUtils.NanoTimer(); + NativeAzureFileSystem fs = getFileSystem(); + fs.delete(hugefileRenamed, false); + timer2.end("time to delete %s", hugefileRenamed); + rm(fs, testPath, true, false); + assertPathDoesNotExist(fs, "deleted huge file", testPath); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/Sizes.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/Sizes.java new file mode 100644 index 00000000000..92b10cfeddd --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/Sizes.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azure.integration; + +/** + * Sizes of data. + * Checkstyle doesn't like the naming scheme or the fact its an interface. + */ +public interface Sizes { + + int S_256 = 256; + int S_512 = 512; + int S_1K = 1024; + int S_4K = 4 * S_1K; + int S_8K = 8 * S_1K; + int S_16K = 16 * S_1K; + int S_32K = 32 * S_1K; + int S_64K = 64 * S_1K; + int S_128K = 128 * S_1K; + int S_256K = 256 * S_1K; + int S_1M = S_1K * S_1K; + int S_2M = 2 * S_1M; + int S_5M = 5 * S_1M; + int S_10M = 10* S_1M; + double NANOSEC = 1.0e9; + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/TestAzureFileSystemInstrumentation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/ITestAzureFileSystemInstrumentation.java similarity index 88% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/TestAzureFileSystemInstrumentation.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/ITestAzureFileSystemInstrumentation.java index 818a844dfdb..60e24eee289 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/TestAzureFileSystemInstrumentation.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/ITestAzureFileSystemInstrumentation.java @@ -33,7 +33,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static org.junit.Assume.assumeNotNull; import static org.mockito.Matchers.argThat; import static org.mockito.Mockito.verify; @@ -44,6 +43,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azure.AbstractWasbTestBase; import org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount; import org.apache.hadoop.fs.azure.AzureException; import org.apache.hadoop.fs.azure.AzureNativeFileSystemStore; @@ -53,39 +53,31 @@ import org.apache.hadoop.metrics2.MetricsTag; import org.hamcrest.BaseMatcher; import org.hamcrest.Description; -import org.junit.After; -import org.junit.Before; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -public class TestAzureFileSystemInstrumentation { - private FileSystem fs; - private AzureBlobStorageTestAccount testAccount; +/** + * Instrumentation test, changing state of time and verifying metrics are + * consistent. + */ +public class ITestAzureFileSystemInstrumentation extends AbstractWasbTestBase { - @Before - public void setUp() throws Exception { - testAccount = AzureBlobStorageTestAccount.create(); - if (testAccount != null) { - fs = testAccount.getFileSystem(); - } - assumeNotNull(testAccount); - } + protected static final Logger LOG = + LoggerFactory.getLogger(ITestAzureFileSystemInstrumentation.class); - @After - public void tearDown() throws Exception { - if (testAccount != null) { - testAccount.cleanup(); - testAccount = null; - fs = null; - } + @Override + protected AzureBlobStorageTestAccount createTestAccount() throws Exception { + return AzureBlobStorageTestAccount.create(); } @Test public void testMetricTags() throws Exception { String accountName = - testAccount.getRealAccount().getBlobEndpoint() + getTestAccount().getRealAccount().getBlobEndpoint() .getAuthority(); String containerName = - testAccount.getRealContainer().getName(); + getTestAccount().getRealContainer().getName(); MetricsRecordBuilder myMetrics = getMyMetrics(); verify(myMetrics).add(argThat( new TagMatcher("accountName", accountName) @@ -119,14 +111,14 @@ public void testMetricsOnMkdirList() throws Exception { AzureMetricsTestUtil.getLongCounterValue(getInstrumentation(), WASB_DIRECTORIES_CREATED)); // List the root contents - assertEquals(1, fs.listStatus(new Path("/")).length); + assertEquals(1, getFileSystem().listStatus(new Path("/")).length); base = assertWebResponsesEquals(base, 1); assertNoErrors(); } private BandwidthGaugeUpdater getBandwidthGaugeUpdater() { - NativeAzureFileSystem azureFs = (NativeAzureFileSystem)fs; + NativeAzureFileSystem azureFs = (NativeAzureFileSystem) getFileSystem(); AzureNativeFileSystemStore azureStore = azureFs.getStore(); return azureStore.getBandwidthGaugeUpdater(); } @@ -152,7 +144,7 @@ public void testMetricsOnFileCreateRead() throws Exception { // Create a file Date start = new Date(); - OutputStream outputStream = fs.create(filePath); + OutputStream outputStream = getFileSystem().create(filePath); outputStream.write(nonZeroByteArray(FILE_SIZE)); outputStream.close(); long uploadDurationMs = new Date().getTime() - start.getTime(); @@ -177,7 +169,7 @@ public void testMetricsOnFileCreateRead() throws Exception { " bytes plus a little overhead.", totalBytesWritten >= FILE_SIZE && totalBytesWritten < (FILE_SIZE * 2)); long uploadRate = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), WASB_UPLOAD_RATE); - System.out.println("Upload rate: " + uploadRate + " bytes/second."); + LOG.info("Upload rate: " + uploadRate + " bytes/second."); long expectedRate = (FILE_SIZE * 1000L) / uploadDurationMs; assertTrue("The upload rate " + uploadRate + " is below the expected range of around " + expectedRate + @@ -187,7 +179,7 @@ public void testMetricsOnFileCreateRead() throws Exception { uploadRate >= expectedRate); long uploadLatency = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), WASB_UPLOAD_LATENCY); - System.out.println("Upload latency: " + uploadLatency); + LOG.info("Upload latency: {}", uploadLatency); long expectedLatency = uploadDurationMs; // We're uploading less than a block. assertTrue("The upload latency " + uploadLatency + " should be greater than zero now that I've just uploaded a file.", @@ -201,7 +193,7 @@ public void testMetricsOnFileCreateRead() throws Exception { // Read the file start = new Date(); - InputStream inputStream = fs.open(filePath); + InputStream inputStream = getFileSystem().open(filePath); int count = 0; while (inputStream.read() >= 0) { count++; @@ -224,7 +216,7 @@ public void testMetricsOnFileCreateRead() throws Exception { " bytes plus a little overhead.", bytesRead > (FILE_SIZE / 2) && bytesRead < (FILE_SIZE * 2)); long downloadRate = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), WASB_DOWNLOAD_RATE); - System.out.println("Download rate: " + downloadRate + " bytes/second."); + LOG.info("Download rate: " + downloadRate + " bytes/second."); expectedRate = (FILE_SIZE * 1000L) / downloadDurationMs; assertTrue("The download rate " + downloadRate + " is below the expected range of around " + expectedRate + @@ -234,7 +226,7 @@ public void testMetricsOnFileCreateRead() throws Exception { downloadRate >= expectedRate); long downloadLatency = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), WASB_DOWNLOAD_LATENCY); - System.out.println("Download latency: " + downloadLatency); + LOG.info("Download latency: " + downloadLatency); expectedLatency = downloadDurationMs; // We're downloading less than a block. assertTrue("The download latency " + downloadLatency + " should be greater than zero now that I've just downloaded a file.", @@ -263,7 +255,7 @@ public void testMetricsOnBigFileCreateRead() throws Exception { getBandwidthGaugeUpdater().suppressAutoUpdate(); // Create a file - OutputStream outputStream = fs.create(filePath); + OutputStream outputStream = getFileSystem().create(filePath); outputStream.write(new byte[FILE_SIZE]); outputStream.close(); @@ -282,16 +274,16 @@ public void testMetricsOnBigFileCreateRead() throws Exception { " bytes plus a little overhead.", totalBytesWritten >= FILE_SIZE && totalBytesWritten < (FILE_SIZE * 2)); long uploadRate = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), WASB_UPLOAD_RATE); - System.out.println("Upload rate: " + uploadRate + " bytes/second."); + LOG.info("Upload rate: " + uploadRate + " bytes/second."); long uploadLatency = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), WASB_UPLOAD_LATENCY); - System.out.println("Upload latency: " + uploadLatency); + LOG.info("Upload latency: " + uploadLatency); assertTrue("The upload latency " + uploadLatency + " should be greater than zero now that I've just uploaded a file.", uploadLatency > 0); // Read the file - InputStream inputStream = fs.open(filePath); + InputStream inputStream = getFileSystem().open(filePath); int count = 0; while (inputStream.read() >= 0) { count++; @@ -308,10 +300,10 @@ public void testMetricsOnBigFileCreateRead() throws Exception { long totalBytesRead = AzureMetricsTestUtil.getCurrentTotalBytesRead(getInstrumentation()); assertEquals(FILE_SIZE, totalBytesRead); long downloadRate = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), WASB_DOWNLOAD_RATE); - System.out.println("Download rate: " + downloadRate + " bytes/second."); + LOG.info("Download rate: " + downloadRate + " bytes/second."); long downloadLatency = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), WASB_DOWNLOAD_LATENCY); - System.out.println("Download latency: " + downloadLatency); + LOG.info("Download latency: " + downloadLatency); assertTrue("The download latency " + downloadLatency + " should be greater than zero now that I've just downloaded a file.", downloadLatency > 0); @@ -326,13 +318,14 @@ public void testMetricsOnFileRename() throws Exception { // Create an empty file assertEquals(0, AzureMetricsTestUtil.getLongCounterValue(getInstrumentation(), WASB_FILES_CREATED)); - assertTrue(fs.createNewFile(originalPath)); + assertTrue(getFileSystem().createNewFile(originalPath)); logOpResponseCount("Creating an empty file", base); base = assertWebResponsesInRange(base, 2, 20); assertEquals(1, AzureMetricsTestUtil.getLongCounterValue(getInstrumentation(), WASB_FILES_CREATED)); // Rename the file - assertTrue(fs.rename(originalPath, destinationPath)); + assertTrue( + ((FileSystem) getFileSystem()).rename(originalPath, destinationPath)); // Varies: at the time of writing this code it takes 7 requests/responses. logOpResponseCount("Renaming a file", base); base = assertWebResponsesInRange(base, 2, 15); @@ -347,7 +340,7 @@ public void testMetricsOnFileExistsDelete() throws Exception { Path filePath = new Path("/metricsTest_delete"); // Check existence - assertFalse(fs.exists(filePath)); + assertFalse(getFileSystem().exists(filePath)); // At the time of writing this code it takes 2 requests/responses to // check existence, which seems excessive, plus initial request for // container check. @@ -355,17 +348,17 @@ public void testMetricsOnFileExistsDelete() throws Exception { base = assertWebResponsesInRange(base, 1, 3); // Create an empty file - assertTrue(fs.createNewFile(filePath)); + assertTrue(getFileSystem().createNewFile(filePath)); base = getCurrentWebResponses(); // Check existence again - assertTrue(fs.exists(filePath)); + assertTrue(getFileSystem().exists(filePath)); logOpResponseCount("Checking file existence for existent file", base); base = assertWebResponsesInRange(base, 1, 2); // Delete the file assertEquals(0, AzureMetricsTestUtil.getLongCounterValue(getInstrumentation(), WASB_FILES_DELETED)); - assertTrue(fs.delete(filePath, false)); + assertTrue(getFileSystem().delete(filePath, false)); // At the time of writing this code it takes 4 requests/responses to // delete, which seems excessive. Check for range 1-4 for now. logOpResponseCount("Deleting a file", base); @@ -384,15 +377,16 @@ public void testMetricsOnDirRename() throws Exception { Path destDirName = new Path("/metricsTestDirectory_RenameFinal"); // Create an empty directory - assertTrue(fs.mkdirs(originalDirName)); + assertTrue(getFileSystem().mkdirs(originalDirName)); base = getCurrentWebResponses(); // Create an inner file - assertTrue(fs.createNewFile(innerFileName)); + assertTrue(getFileSystem().createNewFile(innerFileName)); base = getCurrentWebResponses(); // Rename the directory - assertTrue(fs.rename(originalDirName, destDirName)); + assertTrue(getFileSystem().rename(originalDirName, destDirName)); + // At the time of writing this code it takes 11 requests/responses // to rename the directory with one file. Check for range 1-20 for now. logOpResponseCount("Renaming a directory", base); @@ -401,6 +395,19 @@ public void testMetricsOnDirRename() throws Exception { assertNoErrors(); } + /** + * Recursive discovery of path depth + * @param path path to measure. + * @return depth, where "/" == 0. + */ + int depth(Path path) { + if (path.isRoot()) { + return 0; + } else { + return 1 + depth(path.getParent()); + } + } + @Test public void testClientErrorMetrics() throws Exception { String fileName = "metricsTestFile_ClientError"; @@ -410,8 +417,8 @@ public void testClientErrorMetrics() throws Exception { String leaseID = null; try { // Create a file - outputStream = fs.create(filePath); - leaseID = testAccount.acquireShortLease(fileName); + outputStream = getFileSystem().create(filePath); + leaseID = getTestAccount().acquireShortLease(fileName); try { outputStream.write(new byte[FILE_SIZE]); outputStream.close(); @@ -424,15 +431,15 @@ public void testClientErrorMetrics() throws Exception { assertEquals(0, AzureMetricsTestUtil.getLongCounterValue(getInstrumentation(), WASB_SERVER_ERRORS)); } finally { if(leaseID != null){ - testAccount.releaseLease(leaseID, fileName); + getTestAccount().releaseLease(leaseID, fileName); } IOUtils.closeStream(outputStream); } } private void logOpResponseCount(String opName, long base) { - System.out.println(opName + " took " + (getCurrentWebResponses() - base) + - " web responses to complete."); + LOG.info("{} took {} web responses to complete.", + opName, getCurrentWebResponses() - base); } /** @@ -448,7 +455,7 @@ private long getBaseWebResponses() { * Gets the current value of the wasb_web_responses counter. */ private long getCurrentWebResponses() { - return AzureMetricsTestUtil.getCurrentWebResponses(getInstrumentation()); + return AzureMetricsTestUtil.getCurrentWebResponses(getInstrumentation()); } /** @@ -496,7 +503,7 @@ private MetricsRecordBuilder getMyMetrics() { } private AzureFileSystemInstrumentation getInstrumentation() { - return ((NativeAzureFileSystem)fs).getInstrumentation(); + return getFileSystem().getInstrumentation(); } /** diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java index f626de4ee46..9ccddd122e2 100644 --- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java @@ -25,7 +25,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.namenode.INodeFile; import org.apache.hadoop.hdfs.tools.ECAdmin; @@ -60,12 +59,11 @@ public class TestDistCpUtils { @BeforeClass public static void create() throws IOException { - config.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, - "XOR-2-1-1024k"); cluster = new MiniDFSCluster.Builder(config) .numDataNodes(2) .format(true) - .build(); + .build(); + cluster.getFileSystem().enableErasureCodingPolicy("XOR-2-1-1024k"); } @AfterClass diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml index 6825a36ebdd..2aa9a5c0bb4 100644 --- a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml @@ -153,6 +153,10 @@ + + + + @@ -599,4 +603,22 @@ + + + + + + + + + + + + + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/ams/ApplicationMasterServiceProcessor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/ams/ApplicationMasterServiceProcessor.java index b7d925a6592..8e76a11dc27 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/ams/ApplicationMasterServiceProcessor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/ams/ApplicationMasterServiceProcessor.java @@ -52,11 +52,13 @@ void init(ApplicationMasterServiceContext amsContext, * @param request Register Request. * @param response Register Response. * @throws IOException IOException. + * @throws YarnException in critical situation where invalid + * profiles/resources are added. */ - void registerApplicationMaster( - ApplicationAttemptId applicationAttemptId, + void registerApplicationMaster(ApplicationAttemptId applicationAttemptId, RegisterApplicationMasterRequest request, - RegisterApplicationMasterResponse response) throws IOException; + RegisterApplicationMasterResponse response) + throws IOException, YarnException; /** * Allocate call. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java index 394454f20bc..3c4e4d01002 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationClientProtocol.java @@ -65,6 +65,12 @@ import org.apache.hadoop.yarn.api.protocolrecords.SignalContainerResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; @@ -75,6 +81,7 @@ import org.apache.hadoop.yarn.api.records.YarnClusterMetrics; import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.exceptions.YARNFeatureNotEnabledException; /** *

The protocol between clients and the ResourceManager @@ -578,7 +585,7 @@ SignalContainerResponse signalToContainer( * Note: If application timeout value is less than or equal to current * time then update application throws YarnException. * @param request to set ApplicationTimeouts of an application - * @return an empty response that the update has completed successfully. + * @return a response with updated timeouts. * @throws YarnException if update request has empty values or application is * in completing states. * @throws IOException on IO failures @@ -589,4 +596,50 @@ SignalContainerResponse signalToContainer( public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( UpdateApplicationTimeoutsRequest request) throws YarnException, IOException; + + /** + *

+ * The interface used by clients to get all the resource profiles that are + * available on the ResourceManager. + *

+ * @param request request to get all the resource profiles + * @return Response containing a map of the profile name to Resource + * capabilities + * @throws YARNFeatureNotEnabledException if resource-profile is disabled + * @throws YarnException if any error happens inside YARN + * @throws IOException in case of other errors + */ + @Public + @Unstable + GetAllResourceProfilesResponse getResourceProfiles( + GetAllResourceProfilesRequest request) throws YarnException, IOException; + + /** + *

+ * The interface to get the details for a specific resource profile. + *

+ * @param request request to get the details of a resource profile + * @return Response containing the details for a particular resource profile + * @throws YARNFeatureNotEnabledException if resource-profile is disabled + * @throws YarnException if any error happens inside YARN + * @throws IOException in case of other errors + */ + @Public + @Unstable + GetResourceProfileResponse getResourceProfile( + GetResourceProfileRequest request) throws YarnException, IOException; + + /** + *

+ * The interface to get the details for a specific resource profile. + *

+ * @param request request to get the details of a resource profile + * @return Response containing the details for a particular resource profile + * @throws YarnException if any error happens inside YARN + * @throws IOException in case of other errors + */ + @Public + @Unstable + GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3Exception.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceProfilesRequest.java similarity index 72% rename from hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3Exception.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceProfilesRequest.java index 9258fd7d84d..0bb9bf805d9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3Exception.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceProfilesRequest.java @@ -16,24 +16,20 @@ * limitations under the License. */ -package org.apache.hadoop.fs.s3native; - -import java.io.IOException; +package org.apache.hadoop.yarn.api.protocolrecords; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.util.Records; /** - * Thrown if there is a problem communicating with Amazon S3. + * Request class for getting all the resource profiles from the RM. */ @InterfaceAudience.Public -@InterfaceStability.Stable -public class S3Exception extends IOException { +@InterfaceStability.Unstable +public abstract class GetAllResourceProfilesRequest { - private static final long serialVersionUID = 1L; - - public S3Exception(Throwable t) { - super(t); + public static GetAllResourceProfilesRequest newInstance() { + return Records.newRecord(GetAllResourceProfilesRequest.class); } - } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceProfilesResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceProfilesResponse.java new file mode 100644 index 00000000000..547770890d5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceProfilesResponse.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.util.Records; + +import java.util.Map; + +/** + * Response class for getting all the resource profiles from the RM. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public abstract class GetAllResourceProfilesResponse { + + public static GetAllResourceProfilesResponse newInstance() { + return Records.newRecord(GetAllResourceProfilesResponse.class); + } + + public abstract void setResourceProfiles(Map profiles); + + public abstract Map getResourceProfiles(); + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other == null || !(other instanceof GetAllResourceProfilesResponse)) { + return false; + } + return ((GetAllResourceProfilesResponse) other).getResourceProfiles() + .equals(this.getResourceProfiles()); + } + + @Override + public int hashCode() { + return this.getResourceProfiles().hashCode(); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceTypeInfoRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceTypeInfoRequest.java new file mode 100644 index 00000000000..3bda4f54ec5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceTypeInfoRequest.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.util.Records; + +/** + * Request class for getting all the resource profiles from the RM. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public abstract class GetAllResourceTypeInfoRequest { + + public static GetAllResourceTypeInfoRequest newInstance() { + return Records.newRecord(GetAllResourceTypeInfoRequest.class); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceTypeInfoResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceTypeInfoResponse.java new file mode 100644 index 00000000000..b57b96df3fd --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetAllResourceTypeInfoResponse.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; +import org.apache.hadoop.yarn.util.Records; + +import java.util.List; + +/** + * Response class for getting all the resource profiles from the RM. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public abstract class GetAllResourceTypeInfoResponse { + + public static GetAllResourceTypeInfoResponse newInstance() { + return Records.newRecord(GetAllResourceTypeInfoResponse.class); + } + + public abstract void setResourceTypeInfo(List resourceTypes); + + public abstract List getResourceTypeInfo(); + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other == null || !(other instanceof GetAllResourceTypeInfoResponse)) { + return false; + } + return ((GetAllResourceTypeInfoResponse) other).getResourceTypeInfo() + .equals(this.getResourceTypeInfo()); + } + + @Override + public int hashCode() { + return this.getResourceTypeInfo().hashCode(); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetResourceProfileRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetResourceProfileRequest.java new file mode 100644 index 00000000000..3655be946da --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetResourceProfileRequest.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.util.Records; + +/** + * Request class for getting the details for a particular resource profile. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public abstract class GetResourceProfileRequest { + + public static GetResourceProfileRequest newInstance(String profile) { + GetResourceProfileRequest request = + Records.newRecord(GetResourceProfileRequest.class); + request.setProfileName(profile); + return request; + } + + public abstract void setProfileName(String profileName); + + public abstract String getProfileName(); + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other == null || !(other instanceof GetResourceProfileRequest)) { + return false; + } + return this.getProfileName() + .equals(((GetResourceProfileRequest) other).getProfileName()); + } + + @Override + public int hashCode() { + return getProfileName().hashCode(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetResourceProfileResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetResourceProfileResponse.java new file mode 100644 index 00000000000..a010644a792 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetResourceProfileResponse.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.util.Records; + +/** + * Response class for getting the details for a particular resource profile. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public abstract class GetResourceProfileResponse { + + public static GetResourceProfileResponse newInstance() { + return Records.newRecord(GetResourceProfileResponse.class); + } + + /** + * Get the resources that will be allocated if the profile was used. + * + * @return the resources that will be allocated if the profile was used. + */ + public abstract Resource getResource(); + + /** + * Set the resources that will be allocated if the profile is used. + * + * @param r Set the resources that will be allocated if the profile is used. + */ + public abstract void setResource(Resource r); + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + + if (other == null || !(other instanceof GetResourceProfileResponse)) { + return false; + } + return this.getResource() + .equals(((GetResourceProfileResponse) other).getResource()); + } + + @Override + public int hashCode() { + return getResource().hashCode(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/RegisterApplicationMasterResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/RegisterApplicationMasterResponse.java index 0b886dd5c90..8fa8563e2d3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/RegisterApplicationMasterResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/RegisterApplicationMasterResponse.java @@ -204,4 +204,12 @@ public abstract void setContainersFromPreviousAttempts( @Unstable public abstract void setSchedulerResourceTypes( EnumSet types); + + @Public + @Unstable + public abstract Map getResourceProfiles(); + + @Private + @Unstable + public abstract void setResourceProfiles(Map profiles); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/ITestJets3tNativeS3FileSystemContract.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/ResourceTypes.java similarity index 69% rename from hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/ITestJets3tNativeS3FileSystemContract.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/ResourceTypes.java index e51eaf65014..dbd9c37ceec 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/ITestJets3tNativeS3FileSystemContract.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/ResourceTypes.java @@ -16,18 +16,12 @@ * limitations under the License. */ -package org.apache.hadoop.fs.s3native; - -import java.io.IOException; +package org.apache.hadoop.yarn.api.protocolrecords; /** - * S3N basic contract tests through live S3 service. + * Enum which represents the resource type. Currently, the only type allowed is + * COUNTABLE. */ -public class ITestJets3tNativeS3FileSystemContract - extends NativeS3FileSystemContractBaseTest { - - @Override - NativeFileSystemStore getNativeFileSystemStore() throws IOException { - return new Jets3tNativeFileSystemStore(); - } +public enum ResourceTypes { + COUNTABLE } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/UpdateApplicationTimeoutsResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/UpdateApplicationTimeoutsResponse.java index bd02bb85e84..3770eb4216e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/UpdateApplicationTimeoutsResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/UpdateApplicationTimeoutsResponse.java @@ -22,6 +22,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.records.ApplicationTimeoutType; import org.apache.hadoop.yarn.util.Records; /** @@ -43,4 +44,22 @@ public static UpdateApplicationTimeoutsResponse newInstance() { Records.newRecord(UpdateApplicationTimeoutsResponse.class); return response; } + + /** + * Get ApplicationTimeouts of the application. Timeout value is + * in ISO8601 standard with format yyyy-MM-dd'T'HH:mm:ss.SSSZ. + * @return all ApplicationTimeouts of the application. + */ + public abstract Map getApplicationTimeouts(); + + /** + * Set the ApplicationTimeouts for the application. Timeout value + * is absolute. Timeout value should meet ISO8601 format. Support ISO8601 + * format is yyyy-MM-dd'T'HH:mm:ss.SSSZ. All pre-existing Map entries + * are cleared before adding the new Map. + * @param applicationTimeouts ApplicationTimeoutss for the + * application + */ + public abstract void setApplicationTimeouts( + Map applicationTimeouts); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java index 3cf8f3defa3..d2e33ff9bca 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java @@ -24,6 +24,8 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.util.Records; +import java.util.Map; + /** * Contains various scheduling metrics to be reported by UI and CLI. */ @@ -35,9 +37,9 @@ public abstract class ApplicationResourceUsageReport { @Unstable public static ApplicationResourceUsageReport newInstance( int numUsedContainers, int numReservedContainers, Resource usedResources, - Resource reservedResources, Resource neededResources, long memorySeconds, - long vcoreSeconds, float queueUsagePerc, float clusterUsagePerc, - long preemptedMemorySeconds, long preemptedVcoresSeconds) { + Resource reservedResources, Resource neededResources, + Map resourceSecondsMap, float queueUsagePerc, + float clusterUsagePerc, Map preemtedResourceSecondsMap) { ApplicationResourceUsageReport report = Records.newRecord(ApplicationResourceUsageReport.class); report.setNumUsedContainers(numUsedContainers); @@ -45,12 +47,10 @@ public static ApplicationResourceUsageReport newInstance( report.setUsedResources(usedResources); report.setReservedResources(reservedResources); report.setNeededResources(neededResources); - report.setMemorySeconds(memorySeconds); - report.setVcoreSeconds(vcoreSeconds); + report.setResourceSecondsMap(resourceSecondsMap); report.setQueueUsagePercentage(queueUsagePerc); report.setClusterUsagePercentage(clusterUsagePerc); - report.setPreemptedMemorySeconds(preemptedMemorySeconds); - report.setPreemptedVcoreSeconds(preemptedVcoresSeconds); + report.setPreemptedResourceSecondsMap(preemtedResourceSecondsMap); return report; } @@ -229,4 +229,47 @@ public static ApplicationResourceUsageReport newInstance( @Public @Unstable public abstract long getPreemptedVcoreSeconds(); + + /** + * Get the aggregated number of resources that the application has + * allocated times the number of seconds the application has been running. + * @return map containing the resource name and aggregated resource-seconds + */ + @Public + @Unstable + public abstract Map getResourceSecondsMap(); + + /** + * Set the aggregated number of resources that the application has + * allocated times the number of seconds the application has been running. + * @param resourceSecondsMap map containing the resource name and aggregated + * resource-seconds + */ + @Private + @Unstable + public abstract void setResourceSecondsMap( + Map resourceSecondsMap); + + + /** + * Get the aggregated number of resources preempted that the application has + * allocated times the number of seconds the application has been running. + * @return map containing the resource name and aggregated preempted + * resource-seconds + */ + @Public + @Unstable + public abstract Map getPreemptedResourceSecondsMap(); + + /** + * Set the aggregated number of resources preempted that the application has + * allocated times the number of seconds the application has been running. + * @param preemptedResourceSecondsMap map containing the resource name and + * aggregated preempted resource-seconds + */ + @Private + @Unstable + public abstract void setPreemptedResourceSecondsMap( + Map preemptedResourceSecondsMap); + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/CollectorInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/CollectorInfo.java index d22b9fb48db..30450d655f8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/CollectorInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/CollectorInfo.java @@ -18,16 +18,16 @@ package org.apache.hadoop.yarn.api.records; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Evolving; +import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.yarn.util.Records; /** * Collector info containing collector address and collector token passed from * RM to AM in Allocate Response. */ -@Private -@InterfaceStability.Unstable +@Public +@Evolving public abstract class CollectorInfo { protected static final long DEFAULT_TIMESTAMP_VALUE = -1; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java index 696fe062a5d..45e5bd4df62 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerState.java @@ -33,11 +33,14 @@ public enum ContainerState { /** Running container */ RUNNING, - + /** Completed container */ COMPLETE, /** Scheduled (awaiting resources) at the NM. */ @InterfaceStability.Unstable - SCHEDULED + SCHEDULED, + + /** Paused at the NM. */ + PAUSED } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ProfileCapability.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ProfileCapability.java new file mode 100644 index 00000000000..2cb46704716 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ProfileCapability.java @@ -0,0 +1,174 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.util.Records; + +import java.util.Map; + +/** + * Class to capture capability requirements when using resource profiles. The + * ProfileCapability is meant to be used as part of the ResourceRequest. A + * profile capability has two pieces - the resource profile name and the + * overrides. The resource profile specifies the name of the resource profile + * to be used and the capability override is the overrides desired on specific + * resource types. + * + * For example, if you have a resource profile "small" that maps to + * {@literal <4096M, 2 cores, 1 gpu>} and you set the capability override to + * {@literal <8192M, 0 cores, 0 gpu>}, then the actual resource allocation on + * the ResourceManager will be {@literal <8192M, 2 cores, 1 gpu>}. + * + * Note that the conversion from the ProfileCapability to the Resource class + * with the actual resource requirements will be done by the ResourceManager, + * which has the actual profile to Resource mapping. + * + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public abstract class ProfileCapability { + + public static final String DEFAULT_PROFILE = "default"; + + public static ProfileCapability newInstance(Resource override) { + return newInstance(DEFAULT_PROFILE, override); + } + + public static ProfileCapability newInstance(String profile) { + Preconditions + .checkArgument(profile != null, "The profile name cannot be null"); + ProfileCapability obj = Records.newRecord(ProfileCapability.class); + obj.setProfileName(profile); + obj.setProfileCapabilityOverride(Resource.newInstance(0, 0)); + return obj; + } + + public static ProfileCapability newInstance(String profile, + Resource override) { + Preconditions + .checkArgument(profile != null, "The profile name cannot be null"); + ProfileCapability obj = Records.newRecord(ProfileCapability.class); + obj.setProfileName(profile); + obj.setProfileCapabilityOverride(override); + return obj; + } + + /** + * Get the profile name. + * @return the profile name + */ + public abstract String getProfileName(); + + /** + * Get the profile capability override. + * @return Resource object containing the override. + */ + public abstract Resource getProfileCapabilityOverride(); + + /** + * Set the resource profile name. + * @param profileName the resource profile name + */ + public abstract void setProfileName(String profileName); + + /** + * Set the capability override to override specific resource types on the + * resource profile. + * + * For example, if you have a resource profile "small" that maps to + * {@literal <4096M, 2 cores, 1 gpu>} and you set the capability override to + * {@literal <8192M, 0 cores, 0 gpu>}, then the actual resource allocation on + * the ResourceManager will be {@literal <8192M, 2 cores, 1 gpu>}. + * + * Note that the conversion from the ProfileCapability to the Resource class + * with the actual resource requirements will be done by the ResourceManager, + * which has the actual profile to Resource mapping. + * + * @param r Resource object containing the capability override + */ + public abstract void setProfileCapabilityOverride(Resource r); + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other == null || !(other instanceof ProfileCapability)) { + return false; + } + return ((ProfileCapability) other).getProfileName() + .equals(this.getProfileName()) && ((ProfileCapability) other) + .getProfileCapabilityOverride() + .equals(this.getProfileCapabilityOverride()); + } + + @Override + public int hashCode() { + final int prime = 2153; + int result = 2459; + String name = getProfileName(); + Resource override = getProfileCapabilityOverride(); + result = prime * result + ((name == null) ? 0 : name.hashCode()); + result = prime * result + ((override == null) ? 0 : override.hashCode()); + return result; + } + + @Override + public String toString() { + return "{ profile: " + this.getProfileName() + ", capabilityOverride: " + + this.getProfileCapabilityOverride() + " }"; + } + + /** + * Get a representation of the capability as a Resource object. + * @param capability the capability we wish to convert + * @param resourceProfilesMap map of profile name to Resource object + * @return Resource object representing the capability + */ + public static Resource toResource(ProfileCapability capability, + Map resourceProfilesMap) { + Preconditions + .checkArgument(capability != null, "Capability cannot be null"); + Preconditions.checkArgument(resourceProfilesMap != null, + "Resource profiles map cannot be null"); + Resource none = Resource.newInstance(0, 0); + Resource resource = Resource.newInstance(0, 0); + String profileName = capability.getProfileName(); + if (profileName.isEmpty()) { + profileName = DEFAULT_PROFILE; + } + if (resourceProfilesMap.containsKey(profileName)) { + resource = Resource.newInstance(resourceProfilesMap.get(profileName)); + } + + if (capability.getProfileCapabilityOverride() != null && + !capability.getProfileCapabilityOverride().equals(none)) { + for (ResourceInformation entry : capability + .getProfileCapabilityOverride().getResources()) { + if (entry != null && entry.getValue() >= 0) { + resource.setResourceInformation(entry.getName(), entry); + } + } + } + return resource; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java index 0fd41a2f20b..acd0e6006be 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java @@ -18,12 +18,19 @@ package org.apache.hadoop.yarn.api.records; +import java.util.Arrays; + import org.apache.commons.lang.NotImplementedException; +import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.classification.InterfaceStability.Stable; import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; - +import org.apache.hadoop.yarn.api.records.impl.LightWeightResource; +import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException; +import org.apache.hadoop.yarn.util.Records; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; /** *

Resource models a set of computer resources in the @@ -38,10 +45,10 @@ * the average number of threads it expects to have runnable at a time.

* *

Virtual cores take integer values and thus currently CPU-scheduling is - * very coarse. A complementary axis for CPU requests that represents processing - * power will likely be added in the future to enable finer-grained resource - * configuration.

- * + * very coarse. A complementary axis for CPU requests that represents + * processing power will likely be added in the future to enable finer-grained + * resource configuration.

+ * *

Typically, applications request Resource of suitable * capability to run their component tasks.

* @@ -52,64 +59,84 @@ @Stable public abstract class Resource implements Comparable { - private static class SimpleResource extends Resource { - private long memory; - private long vcores; - SimpleResource(long memory, long vcores) { - this.memory = memory; - this.vcores = vcores; - } - @Override - public int getMemory() { - return (int)memory; - } - @Override - public void setMemory(int memory) { - this.memory = memory; - } - @Override - public long getMemorySize() { - return memory; - } - @Override - public void setMemorySize(long memory) { - this.memory = memory; - } - @Override - public int getVirtualCores() { - return (int)vcores; - } - @Override - public void setVirtualCores(int vcores) { - this.vcores = vcores; - } - } + protected ResourceInformation[] resources = null; + + // Number of mandatory resources, this is added to avoid invoke + // MandatoryResources.values().length, since values() internally will + // copy array, etc. + protected static final int NUM_MANDATORY_RESOURCES = 2; + + protected static final int MEMORY_INDEX = 0; + protected static final int VCORES_INDEX = 1; @Public @Stable public static Resource newInstance(int memory, int vCores) { - return new SimpleResource(memory, vCores); + if (ResourceUtils.getNumberOfKnownResourceTypes() > 2) { + Resource ret = Records.newRecord(Resource.class); + ret.setMemorySize(memory); + ret.setVirtualCores(vCores); + return ret; + } + return new LightWeightResource(memory, vCores); } @Public @Stable public static Resource newInstance(long memory, int vCores) { - return new SimpleResource(memory, vCores); + if (ResourceUtils.getNumberOfKnownResourceTypes() > 2) { + Resource ret = Records.newRecord(Resource.class); + ret.setMemorySize(memory); + ret.setVirtualCores(vCores); + return ret; + } + return new LightWeightResource(memory, vCores); + } + + @InterfaceAudience.Private + @InterfaceStability.Unstable + public static Resource newInstance(Resource resource) { + Resource ret = Resource.newInstance(resource.getMemorySize(), + resource.getVirtualCores()); + if (ResourceUtils.getNumberOfKnownResourceTypes() > 2) { + Resource.copy(resource, ret); + } + return ret; + } + + @InterfaceAudience.Private + @InterfaceStability.Unstable + public static void copy(Resource source, Resource dest) { + for (ResourceInformation entry : source.getResources()) { + dest.setResourceInformation(entry.getName(), entry); + } } /** * This method is DEPRECATED: * Use {@link Resource#getMemorySize()} instead * - * Get memory of the resource. - * @return memory of the resource + * Get memory of the resource. Note - while memory has + * never had a unit specified, all YARN configurations have specified memory + * in MB. The assumption has been that the daemons and applications are always + * using the same units. With the introduction of the ResourceInformation + * class we have support for units - so this function will continue to return + * memory but in the units of MB + * + * @return memory(in MB) of the resource */ @Public @Deprecated public abstract int getMemory(); /** - * Get memory of the resource. + * Get memory of the resource. Note - while memory has + * never had a unit specified, all YARN configurations have specified memory + * in MB. The assumption has been that the daemons and applications are always + * using the same units. With the introduction of the ResourceInformation + * class we have support for units - so this function will continue to return + * memory but in the units of MB + * * @return memory of the resource */ @Public @@ -120,8 +147,14 @@ public long getMemorySize() { } /** - * Set memory of the resource. - * @param memory memory of the resource + * Set memory of the resource. Note - while memory has + * never had a unit specified, all YARN configurations have specified memory + * in MB. The assumption has been that the daemons and applications are always + * using the same units. With the introduction of the ResourceInformation + * class we have support for units - so this function will continue to set + * memory but the assumption is that the value passed is in units of MB. + * + * @param memory memory(in MB) of the resource */ @Public @Deprecated @@ -138,72 +171,299 @@ public void setMemorySize(long memory) { "This method is implemented by ResourcePBImpl"); } - /** * Get number of virtual cpu cores of the resource. * * Virtual cores are a unit for expressing CPU parallelism. A node's capacity - * should be configured with virtual cores equal to its number of physical cores. - * A container should be requested with the number of cores it can saturate, i.e. - * the average number of threads it expects to have runnable at a time. - * + * should be configured with virtual cores equal to its number of physical + * cores. A container should be requested with the number of cores it can + * saturate, i.e. the average number of threads it expects to have runnable + * at a time. + * * @return num of virtual cpu cores of the resource */ @Public @Evolving public abstract int getVirtualCores(); - + /** * Set number of virtual cpu cores of the resource. * * Virtual cores are a unit for expressing CPU parallelism. A node's capacity - * should be configured with virtual cores equal to its number of physical cores. - * A container should be requested with the number of cores it can saturate, i.e. - * the average number of threads it expects to have runnable at a time. - * + * should be configured with virtual cores equal to its number of physical + * cores. A container should be requested with the number of cores it can + * saturate, i.e. the average number of threads it expects to have runnable + * at a time. + * * @param vCores number of virtual cpu cores of the resource */ @Public @Evolving public abstract void setVirtualCores(int vCores); - @Override - public int hashCode() { - final int prime = 263167; + /** + * Get ResourceInformation for all resources. + * + * @return Map of resource name to ResourceInformation + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public ResourceInformation[] getResources() { + return resources; + } - int result = (int) (939769357 - + getMemorySize()); // prime * result = 939769357 initially - result = prime * result + getVirtualCores(); - return result; + /** + * Get ResourceInformation for a specified resource. + * + * @param resource name of the resource + * @return the ResourceInformation object for the resource + * @throws ResourceNotFoundException if the resource can't be found + */ + @Public + @InterfaceStability.Unstable + public ResourceInformation getResourceInformation(String resource) + throws ResourceNotFoundException { + Integer index = ResourceUtils.getResourceTypeIndex().get(resource); + if (index != null) { + return resources[index]; + } + throw new ResourceNotFoundException("Unknown resource '" + resource + + "'. Known resources are " + Arrays.toString(resources)); + } + + /** + * Get ResourceInformation for a specified resource from a given index. + * + * @param index + * of the resource + * @return the ResourceInformation object for the resource + * @throws ResourceNotFoundException + * if the resource can't be found + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public ResourceInformation getResourceInformation(int index) + throws ResourceNotFoundException { + ResourceInformation ri = null; + try { + ri = resources[index]; + } catch (ArrayIndexOutOfBoundsException e) { + throwExceptionWhenArrayOutOfBound(index); + } + return ri; + } + + /** + * Get the value for a specified resource. No information about the units is + * returned. + * + * @param resource name of the resource + * @return the value for the resource + * @throws ResourceNotFoundException if the resource can't be found + */ + @Public + @InterfaceStability.Unstable + public long getResourceValue(String resource) + throws ResourceNotFoundException { + return getResourceInformation(resource).getValue(); + } + + /** + * Set the ResourceInformation object for a particular resource. + * + * @param resource the resource for which the ResourceInformation is provided + * @param resourceInformation ResourceInformation object + * @throws ResourceNotFoundException if the resource is not found + */ + @Public + @InterfaceStability.Unstable + public void setResourceInformation(String resource, + ResourceInformation resourceInformation) + throws ResourceNotFoundException { + if (resource.equals(ResourceInformation.MEMORY_URI)) { + this.setMemorySize(resourceInformation.getValue()); + return; + } + if (resource.equals(ResourceInformation.VCORES_URI)) { + this.setVirtualCores((int) resourceInformation.getValue()); + return; + } + ResourceInformation storedResourceInfo = getResourceInformation(resource); + ResourceInformation.copy(resourceInformation, storedResourceInfo); + } + + /** + * Set the ResourceInformation object for a particular resource. + * + * @param index + * the resource index for which the ResourceInformation is provided + * @param resourceInformation + * ResourceInformation object + * @throws ResourceNotFoundException + * if the resource is not found + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public void setResourceInformation(int index, + ResourceInformation resourceInformation) + throws ResourceNotFoundException { + if (index < 0 || index >= resources.length) { + throw new ResourceNotFoundException("Unknown resource at index '" + index + + "'. Valid resources are " + Arrays.toString(resources)); + } + ResourceInformation.copy(resourceInformation, resources[index]); + } + + /** + * Set the value of a resource in the ResourceInformation object. The unit of + * the value is assumed to be the one in the ResourceInformation object. + * + * @param resource the resource for which the value is provided. + * @param value the value to set + * @throws ResourceNotFoundException if the resource is not found + */ + @Public + @InterfaceStability.Unstable + public void setResourceValue(String resource, long value) + throws ResourceNotFoundException { + if (resource.equals(ResourceInformation.MEMORY_URI)) { + this.setMemorySize(value); + return; + } + if (resource.equals(ResourceInformation.VCORES_URI)) { + this.setVirtualCores((int)value); + return; + } + + ResourceInformation storedResourceInfo = getResourceInformation(resource); + storedResourceInfo.setValue(value); + } + + /** + * Set the value of a resource in the ResourceInformation object. The unit of + * the value is assumed to be the one in the ResourceInformation object. + * + * @param index + * the resource index for which the value is provided. + * @param value + * the value to set + * @throws ResourceNotFoundException + * if the resource is not found + */ + @InterfaceAudience.Private + @InterfaceStability.Unstable + public void setResourceValue(int index, long value) + throws ResourceNotFoundException { + try { + resources[index].setValue(value); + } catch (ArrayIndexOutOfBoundsException e) { + throwExceptionWhenArrayOutOfBound(index); + } + } + + private void throwExceptionWhenArrayOutOfBound(int index) { + String exceptionMsg = String.format( + "Trying to access ResourceInformation for given index=%d. " + + "Acceptable index range is [0,%d), please check double check " + + "configured resources in resource-types.xml", + index, ResourceUtils.getNumberOfKnownResourceTypes()); + + throw new ResourceNotFoundException(exceptionMsg); } @Override public boolean equals(Object obj) { - if (this == obj) + if (this == obj) { return true; - if (obj == null) + } + if (obj == null) { return false; - if (!(obj instanceof Resource)) + } + if (!(obj instanceof Resource)) { return false; + } Resource other = (Resource) obj; - if (getMemorySize() != other.getMemorySize() || - getVirtualCores() != other.getVirtualCores()) { + + ResourceInformation[] otherVectors = other.getResources(); + + if (resources.length != otherVectors.length) { return false; } + + for (int i = 0; i < resources.length; i++) { + ResourceInformation a = resources[i]; + ResourceInformation b = otherVectors[i]; + if ((a != b) && ((a == null) || !a.equals(b))) { + return false; + } + } return true; } @Override public int compareTo(Resource other) { - long diff = this.getMemorySize() - other.getMemorySize(); - if (diff == 0) { - diff = this.getVirtualCores() - other.getVirtualCores(); + ResourceInformation[] otherResources = other.getResources(); + + int arrLenThis = this.resources.length; + int arrLenOther = otherResources.length; + + // compare memory and vcores first(in that order) to preserve + // existing behaviour + for (int i = 0; i < arrLenThis; i++) { + ResourceInformation otherEntry; + try { + otherEntry = otherResources[i]; + } catch (ArrayIndexOutOfBoundsException e) { + // For two vectors with different size and same prefix. Shorter vector + // goes first. + return 1; + } + ResourceInformation entry = resources[i]; + + long diff = entry.compareTo(otherEntry); + if (diff > 0) { + return 1; + } else if (diff < 0) { + return -1; + } } - return diff == 0 ? 0 : (diff > 0 ? 1 : -1); + + if (arrLenThis < arrLenOther) { + return -1; + } + + return 0; } @Override public String toString() { - return ""; + StringBuilder sb = new StringBuilder(); + + sb.append(""); + return sb.toString(); + } + + @Override + public int hashCode() { + final int prime = 47; + long result = 0; + for (ResourceInformation entry : resources) { + result = prime * result + entry.hashCode(); + } + return (int) result; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java new file mode 100644 index 00000000000..2a040948d58 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceInformation.java @@ -0,0 +1,291 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records; + +import org.apache.curator.shaded.com.google.common.reflect.ClassPath; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.util.UnitsConversionUtil; + +/** + * Class to encapsulate information about a Resource - the name of the resource, + * the units(milli, micro, etc), the type(countable), and the value. + */ +public class ResourceInformation implements Comparable { + + private String name; + private String units; + private ResourceTypes resourceType; + private long value; + private long minimumAllocation; + private long maximumAllocation; + + public static final String MEMORY_URI = "memory-mb"; + public static final String VCORES_URI = "vcores"; + + public static final ResourceInformation MEMORY_MB = + ResourceInformation.newInstance(MEMORY_URI, "Mi"); + public static final ResourceInformation VCORES = + ResourceInformation.newInstance(VCORES_URI); + + /** + * Get the name for the resource. + * + * @return resource name + */ + public String getName() { + return name; + } + + /** + * Set the name for the resource. + * + * @param rName name for the resource + */ + public void setName(String rName) { + this.name = rName; + } + + /** + * Get units for the resource. + * + * @return units for the resource + */ + public String getUnits() { + return units; + } + + /** + * Set the units for the resource. + * + * @param rUnits units for the resource + */ + public void setUnits(String rUnits) { + if (!UnitsConversionUtil.KNOWN_UNITS.contains(rUnits)) { + throw new IllegalArgumentException( + "Unknown unit '" + rUnits + "'. Known units are " + + UnitsConversionUtil.KNOWN_UNITS); + } + this.units = rUnits; + } + + /** + * Checking if a unit included by KNOWN_UNITS is an expensive operation. This + * can be avoided in critical path in RM. + * @param rUnits units for the resource + */ + @InterfaceAudience.Private + public void setUnitsWithoutValidation(String rUnits) { + this.units = rUnits; + } + + /** + * Get the resource type. + * + * @return the resource type + */ + public ResourceTypes getResourceType() { + return resourceType; + } + + /** + * Set the resource type. + * + * @param type the resource type + */ + public void setResourceType(ResourceTypes type) { + this.resourceType = type; + } + + /** + * Get the value for the resource. + * + * @return the resource value + */ + public long getValue() { + return value; + } + + /** + * Set the value for the resource. + * + * @param rValue the resource value + */ + public void setValue(long rValue) { + this.value = rValue; + } + + /** + * Get the minimum allocation for the resource. + * + * @return the minimum allocation for the resource + */ + public long getMinimumAllocation() { + return minimumAllocation; + } + + /** + * Set the minimum allocation for the resource. + * + * @param minimumAllocation the minimum allocation for the resource + */ + public void setMinimumAllocation(long minimumAllocation) { + this.minimumAllocation = minimumAllocation; + } + + /** + * Get the maximum allocation for the resource. + * + * @return the maximum allocation for the resource + */ + public long getMaximumAllocation() { + return maximumAllocation; + } + + /** + * Set the maximum allocation for the resource. + * + * @param maximumAllocation the maximum allocation for the resource + */ + public void setMaximumAllocation(long maximumAllocation) { + this.maximumAllocation = maximumAllocation; + } + + /** + * Create a new instance of ResourceInformation from another object. + * + * @param other the object from which the new object should be created + * @return the new ResourceInformation object + */ + public static ResourceInformation newInstance(ResourceInformation other) { + ResourceInformation ret = new ResourceInformation(); + copy(other, ret); + return ret; + } + + public static ResourceInformation newInstance(String name, String units, + long value, ResourceTypes type, long minimumAllocation, + long maximumAllocation) { + ResourceInformation ret = new ResourceInformation(); + ret.setName(name); + ret.setResourceType(type); + ret.setUnits(units); + ret.setValue(value); + ret.setMinimumAllocation(minimumAllocation); + ret.setMaximumAllocation(maximumAllocation); + return ret; + } + + public static ResourceInformation newInstance(String name, String units, + long value) { + return ResourceInformation + .newInstance(name, units, value, ResourceTypes.COUNTABLE, 0L, + Long.MAX_VALUE); + } + + public static ResourceInformation newInstance(String name, String units) { + return ResourceInformation + .newInstance(name, units, 0L, ResourceTypes.COUNTABLE, 0L, + Long.MAX_VALUE); + } + + public static ResourceInformation newInstance(String name, long value) { + return ResourceInformation + .newInstance(name, "", value, ResourceTypes.COUNTABLE, 0L, + Long.MAX_VALUE); + } + + public static ResourceInformation newInstance(String name) { + return ResourceInformation.newInstance(name, ""); + } + + /** + * Copies the content of the source ResourceInformation object to the + * destination object, overwriting all properties of the destination object. + * @param src Source ResourceInformation object + * @param dst Destination ResourceInformation object + */ + + public static void copy(ResourceInformation src, ResourceInformation dst) { + dst.setName(src.getName()); + dst.setResourceType(src.getResourceType()); + dst.setUnits(src.getUnits()); + dst.setValue(src.getValue()); + dst.setMinimumAllocation(src.getMinimumAllocation()); + dst.setMaximumAllocation(src.getMaximumAllocation()); + } + + @Override + public String toString() { + return "name: " + this.name + ", units: " + this.units + ", type: " + + resourceType + ", value: " + value + ", minimum allocation: " + + minimumAllocation + ", maximum allocation: " + maximumAllocation; + } + + public String getShorthandRepresentation() { + return "" + this.value + this.units; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (!(obj instanceof ResourceInformation)) { + return false; + } + ResourceInformation r = (ResourceInformation) obj; + if (!this.name.equals(r.getName()) + || !this.resourceType.equals(r.getResourceType())) { + return false; + } + if (this.units.equals(r.units)) { + return this.value == r.value; + } + return (UnitsConversionUtil.compare(this.units, this.value, r.units, + r.value) == 0); + } + + @Override + public int hashCode() { + final int prime = 263167; + int result = + 939769357 + name.hashCode(); // prime * result = 939769357 initially + result = prime * result + resourceType.hashCode(); + result = prime * result + units.hashCode(); + result = prime * result + Long.hashCode(value); + return result; + } + + @Override + public int compareTo(ResourceInformation other) { + int diff = this.name.compareTo(other.name); + if (diff == 0) { + diff = UnitsConversionUtil + .compare(this.units, this.value, other.units, other.value); + if (diff == 0) { + diff = this.resourceType.compareTo(other.resourceType); + } + } + return diff; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java index 5bedc879ee3..21fa15f14aa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceRequest.java @@ -21,6 +21,7 @@ import java.io.Serializable; import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.classification.InterfaceStability.Stable; import org.apache.hadoop.classification.InterfaceStability.Unstable; @@ -98,7 +99,22 @@ public static ResourceRequest newInstance(Priority priority, String hostName, .resourceName(hostName).capability(capability) .numContainers(numContainers).relaxLocality(relaxLocality) .nodeLabelExpression(labelExpression) - .executionTypeRequest(executionTypeRequest).build(); + .executionTypeRequest(executionTypeRequest).profileCapability(null) + .build(); + } + + @Public + @Unstable + public static ResourceRequest newInstance(Priority priority, String hostName, + Resource capability, int numContainers, boolean relaxLocality, + String labelExpression, ExecutionTypeRequest executionTypeRequest, + ProfileCapability profile) { + return ResourceRequest.newBuilder().priority(priority) + .resourceName(hostName).capability(capability) + .numContainers(numContainers).relaxLocality(relaxLocality) + .nodeLabelExpression(labelExpression) + .executionTypeRequest(executionTypeRequest).profileCapability(profile) + .build(); } @Public @@ -124,6 +140,7 @@ private ResourceRequestBuilder() { resourceRequest.setRelaxLocality(true); resourceRequest.setExecutionTypeRequest( ExecutionTypeRequest.newInstance()); + resourceRequest.setProfileCapability(null); } /** @@ -237,6 +254,21 @@ public ResourceRequestBuilder allocationRequestId( return this; } + /** + * Set the resourceProfile of the request. + * @see ResourceRequest#setProfileCapability(ProfileCapability) + * @param profileCapability + * profileCapability of the request + * @return {@link ResourceRequestBuilder} + */ + @Public + @InterfaceStability.Unstable + public ResourceRequestBuilder profileCapability( + ProfileCapability profileCapability) { + resourceRequest.setProfileCapability(profileCapability); + return this; + } + /** * Return generated {@link ResourceRequest} object. * @return {@link ResourceRequest} @@ -454,6 +486,14 @@ public ExecutionTypeRequest getExecutionTypeRequest() { @Evolving public abstract void setNodeLabelExpression(String nodelabelExpression); + @Public + @InterfaceStability.Unstable + public abstract ProfileCapability getProfileCapability(); + + @Public + @InterfaceStability.Unstable + public abstract void setProfileCapability(ProfileCapability p); + /** * Get the optional ID corresponding to this allocation request. This * ID is an identifier for different {@code ResourceRequest}s from the same @@ -529,12 +569,14 @@ public int hashCode() { Resource capability = getCapability(); String hostName = getResourceName(); Priority priority = getPriority(); + ProfileCapability profile = getProfileCapability(); result = prime * result + ((capability == null) ? 0 : capability.hashCode()); result = prime * result + ((hostName == null) ? 0 : hostName.hashCode()); result = prime * result + getNumContainers(); result = prime * result + ((priority == null) ? 0 : priority.hashCode()); result = prime * result + Long.valueOf(getAllocationRequestId()).hashCode(); + result = prime * result + ((profile == null) ? 0 : profile.hashCode()); return result; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceTypeInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceTypeInfo.java new file mode 100644 index 00000000000..b6f7f147658 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceTypeInfo.java @@ -0,0 +1,197 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.util.Records; + +/** + * Class to encapsulate information about a ResourceType - the name of the + * resource, the units(milli, micro, etc), the type(countable). + */ +public abstract class ResourceTypeInfo implements Comparable { + + /** + * Get the name for the resource. + * + * @return resource name + */ + public abstract String getName(); + + /** + * Set the name for the resource. + * + * @param rName + * name for the resource + */ + public abstract void setName(String rName); + + /** + * Get units for the resource. + * + * @return units for the resource + */ + public abstract String getDefaultUnit(); + + /** + * Set the units for the resource. + * + * @param rUnits + * units for the resource + */ + public abstract void setDefaultUnit(String rUnits); + + /** + * Get the resource type. + * + * @return the resource type + */ + public abstract ResourceTypes getResourceType(); + + /** + * Set the resource type. + * + * @param type + * the resource type + */ + public abstract void setResourceType(ResourceTypes type); + + /** + * Create a new instance of ResourceTypeInfo from another object. + * + * @param other + * the object from which the new object should be created + * @return the new ResourceTypeInfo object + */ + @InterfaceAudience.Public + @InterfaceStability.Unstable + public static ResourceTypeInfo newInstance(ResourceTypeInfo other) { + ResourceTypeInfo resourceType = Records.newRecord(ResourceTypeInfo.class); + copy(other, resourceType); + return resourceType; + } + + /** + * Create a new instance of ResourceTypeInfo from name, units and type. + * + * @param name name of resource type + * @param units units of resource type + * @param type such as countable, etc. + * @return the new ResourceTypeInfo object + */ + @InterfaceAudience.Public + @InterfaceStability.Unstable + public static ResourceTypeInfo newInstance(String name, String units, + ResourceTypes type) { + ResourceTypeInfo resourceType = Records.newRecord(ResourceTypeInfo.class); + resourceType.setName(name); + resourceType.setResourceType(type); + resourceType.setDefaultUnit(units); + return resourceType; + } + + /** + * Create a new instance of ResourceTypeInfo from name, units. + * + * @param name name of resource type + * @param units units of resource type + * @return the new ResourceTypeInfo object + */ + @InterfaceAudience.Public + @InterfaceStability.Unstable + public static ResourceTypeInfo newInstance(String name, String units) { + return ResourceTypeInfo.newInstance(name, units, ResourceTypes.COUNTABLE); + } + + /** + * Create a new instance of ResourceTypeInfo from name. + * + * @param name name of resource type + * @return the new ResourceTypeInfo object + */ + @InterfaceAudience.Public + @InterfaceStability.Unstable + public static ResourceTypeInfo newInstance(String name) { + return ResourceTypeInfo.newInstance(name, ""); + } + + /** + * Copies the content of the source ResourceTypeInfo object to the + * destination object, overwriting all properties of the destination object. + * + * @param src + * Source ResourceTypeInfo object + * @param dst + * Destination ResourceTypeInfo object + */ + + public static void copy(ResourceTypeInfo src, ResourceTypeInfo dst) { + dst.setName(src.getName()); + dst.setResourceType(src.getResourceType()); + dst.setDefaultUnit(src.getDefaultUnit()); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(""); + return sb.toString(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (!(obj instanceof ResourceTypeInfo)) { + return false; + } + ResourceTypeInfo r = (ResourceTypeInfo) obj; + return this.getName().equals(r.getName()) + && this.getResourceType().equals(r.getResourceType()) + && this.getDefaultUnit().equals(r.getDefaultUnit()); + } + + @Override + public int hashCode() { + final int prime = 47; + int result = prime + getName().hashCode(); + result = prime * result + getResourceType().hashCode(); + return result; + } + + @Override + public int compareTo(ResourceTypeInfo other) { + int diff = this.getName().compareTo(other.getName()); + if (diff == 0) { + diff = this.getDefaultUnit().compareTo(other.getDefaultUnit()); + if (diff == 0) { + diff = this.getResourceType().compareTo(other.getResourceType()); + } + } + return diff; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/LightWeightResource.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/LightWeightResource.java new file mode 100644 index 00000000000..b80e13388cf --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/LightWeightResource.java @@ -0,0 +1,163 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records.impl; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; + +import static org.apache.hadoop.yarn.api.records.ResourceInformation.MEMORY_MB; +import static org.apache.hadoop.yarn.api.records.ResourceInformation.MEMORY_URI; +import static org.apache.hadoop.yarn.api.records.ResourceInformation.VCORES_URI; + +/** + *

+ * LightResource extends Resource to handle base resources such + * as memory and CPU. + * TODO: We have a long term plan to use AbstractResource when additional + * resource types are to be handled as well. + * This will be used to speed up internal calculation to avoid creating + * costly PB-backed Resource object: ResourcePBImpl + *

+ * + *

+ * Currently it models both memory and CPU. + *

+ * + *

+ * The unit for memory is megabytes. CPU is modeled with virtual cores (vcores), + * a unit for expressing parallelism. A node's capacity should be configured + * with virtual cores equal to its number of physical cores. A container should + * be requested with the number of cores it can saturate, i.e. the average + * number of threads it expects to have runnable at a time. + *

+ * + *

+ * Virtual cores take integer values and thus currently CPU-scheduling is very + * coarse. A complementary axis for CPU requests that represents processing + * power will likely be added in the future to enable finer-grained resource + * configuration. + *

+ * + * @see Resource + */ +@InterfaceAudience.Private +@Unstable +public class LightWeightResource extends Resource { + + private ResourceInformation memoryResInfo; + private ResourceInformation vcoresResInfo; + + public LightWeightResource(long memory, long vcores) { + this.memoryResInfo = LightWeightResource.newDefaultInformation(MEMORY_URI, + MEMORY_MB.getUnits(), memory); + this.vcoresResInfo = LightWeightResource.newDefaultInformation(VCORES_URI, + "", vcores); + + resources = new ResourceInformation[NUM_MANDATORY_RESOURCES]; + resources[MEMORY_INDEX] = memoryResInfo; + resources[VCORES_INDEX] = vcoresResInfo; + } + + private static ResourceInformation newDefaultInformation(String name, + String unit, long value) { + ResourceInformation ri = new ResourceInformation(); + ri.setName(name); + ri.setValue(value); + ri.setResourceType(ResourceTypes.COUNTABLE); + ri.setUnitsWithoutValidation(unit); + ri.setMinimumAllocation(0); + ri.setMaximumAllocation(Long.MAX_VALUE); + return ri; + } + + @Override + @SuppressWarnings("deprecation") + public int getMemory() { + return (int) memoryResInfo.getValue(); + } + + @Override + @SuppressWarnings("deprecation") + public void setMemory(int memory) { + this.memoryResInfo.setValue(memory); + } + + @Override + public long getMemorySize() { + return memoryResInfo.getValue(); + } + + @Override + public void setMemorySize(long memory) { + this.memoryResInfo.setValue(memory); + } + + @Override + public int getVirtualCores() { + return (int) vcoresResInfo.getValue(); + } + + @Override + public void setVirtualCores(int vcores) { + this.vcoresResInfo.setValue(vcores); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || !(obj instanceof Resource)) { + return false; + } + Resource other = (Resource) obj; + if (getMemorySize() != other.getMemorySize() + || getVirtualCores() != other.getVirtualCores()) { + return false; + } + + return true; + } + + @Override + public int compareTo(Resource other) { + // compare memory and vcores first(in that order) to preserve + // existing behaviour + long diff = this.getMemorySize() - other.getMemorySize(); + if (diff == 0) { + return this.getVirtualCores() - other.getVirtualCores(); + } else if (diff > 0){ + return 1; + } else { + return -1; + } + } + + @Override + public int hashCode() { + final int prime = 47; + long result = prime + getMemorySize(); + result = prime * result + getVirtualCores(); + + return (int) result; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/package-info.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/package-info.java new file mode 100644 index 00000000000..b2420bc50f8 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/package-info.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Package org.apache.hadoop.yarn.api.records.impl contains classes + * which define basic resources. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +package org.apache.hadoop.yarn.api.records.impl; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 27ca9572f8e..114453f6dc9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -60,9 +60,28 @@ public class YarnConfiguration extends Configuration { @Private public static final String CORE_SITE_CONFIGURATION_FILE = "core-site.xml"; + @Private + public static final String RESOURCE_TYPES_CONFIGURATION_FILE = + "resource-types.xml"; + + @Private + public static final String NODE_RESOURCES_CONFIGURATION_FILE = + "node-resources.xml"; + @Private public static final List RM_CONFIGURATION_FILES = Collections.unmodifiableList(Arrays.asList( + RESOURCE_TYPES_CONFIGURATION_FILE, + DR_CONFIGURATION_FILE, + CS_CONFIGURATION_FILE, + HADOOP_POLICY_CONFIGURATION_FILE, + YARN_SITE_CONFIGURATION_FILE, + CORE_SITE_CONFIGURATION_FILE)); + + @Private + public static final List NM_CONFIGURATION_FILES = + Collections.unmodifiableList(Arrays.asList( + NODE_RESOURCES_CONFIGURATION_FILE, DR_CONFIGURATION_FILE, CS_CONFIGURATION_FILE, HADOOP_POLICY_CONFIGURATION_FILE, @@ -106,6 +125,16 @@ private static void addDeprecatedKeys() { public static final String YARN_PREFIX = "yarn."; + ///////////////////////////// + // Resource types configs + //////////////////////////// + + public static final String RESOURCE_TYPES = + YarnConfiguration.YARN_PREFIX + "resource-types"; + + public static final String NM_RESOURCES_PREFIX = + YarnConfiguration.NM_PREFIX + "resource-type."; + /** Delay before deleting resource to ease debugging of NM issues */ public static final String DEBUG_NM_DELETE_DELAY_SEC = YarnConfiguration.NM_PREFIX + "delete.debug-delay-sec"; @@ -855,6 +884,29 @@ public static boolean isAclEnabled(Configuration conf) { */ public static final String RM_PROXY_USER_PREFIX = RM_PREFIX + "proxyuser."; + /** + * Enable/disable resource profiles. + */ + @Public + @Unstable + public static final String RM_RESOURCE_PROFILES_ENABLED = + RM_PREFIX + "resource-profiles.enabled"; + @Public + @Unstable + public static final boolean DEFAULT_RM_RESOURCE_PROFILES_ENABLED = false; + + /** + * File containing resource profiles. + */ + @Public + @Unstable + public static final String RM_RESOURCE_PROFILES_SOURCE_FILE = + RM_PREFIX + "resource-profiles.source-file"; + @Public + @Unstable + public static final String DEFAULT_RM_RESOURCE_PROFILES_SOURCE_FILE = + "resource-profiles.json"; + /** * Timeout in seconds for YARN node graceful decommission. * This is the maximal time to wait for running containers and applications @@ -916,9 +968,13 @@ public static boolean isAclEnabled(Configuration conf) { NM_PREFIX + "bind-host"; /** who will execute(launch) the containers.*/ - public static final String NM_CONTAINER_EXECUTOR = + public static final String NM_CONTAINER_EXECUTOR = NM_PREFIX + "container-executor.class"; + /** List of container state transition listeners.*/ + public static final String NM_CONTAINER_STATE_TRANSITION_LISTENERS = + NM_PREFIX + "container-state-transition-listener.classes"; + /** * Adjustment to make to the container os scheduling priority. * The valid values for this could vary depending on the platform. @@ -1036,6 +1092,15 @@ public static boolean isAclEnabled(Configuration conf) { NM_PREFIX + "container-retry-minimum-interval-ms"; public static final int DEFAULT_NM_CONTAINER_RETRY_MINIMUM_INTERVAL_MS = 1000; + /** + * Use container pause as the preemption policy over kill in the container + * queue at a NodeManager. + **/ + public static final String NM_CONTAINER_QUEUING_USE_PAUSE_FOR_PREEMPTION = + NM_PREFIX + "opportunistic-containers-use-pause-for-preemption"; + public static final boolean + DEFAULT_NM_CONTAINER_QUEUING_USE_PAUSE_FOR_PREEMPTION = false; + /** Interval at which the delayed token removal thread runs */ public static final String RM_DELAYED_DELEGATION_TOKEN_REMOVAL_INTERVAL_MS = RM_PREFIX + "delayed.delegation-token.removal-interval-ms"; @@ -1470,6 +1535,23 @@ public static boolean isAclEnabled(Configuration conf) { /** Prefix for runtime configuration constants. */ public static final String LINUX_CONTAINER_RUNTIME_PREFIX = NM_PREFIX + "runtime.linux."; + + /** + * Comma separated list of runtimes that are allowed when using + * LinuxContainerExecutor. The allowed values are: + *
    + *
  • default
  • + *
  • docker
  • + *
  • javasandbox
  • + *
+ */ + public static final String LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES = + LINUX_CONTAINER_RUNTIME_PREFIX + "allowed-runtimes"; + + /** The default list of allowed runtimes when using LinuxContainerExecutor. */ + public static final String[] DEFAULT_LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES + = {"default"}; + public static final String DOCKER_CONTAINER_RUNTIME_PREFIX = LINUX_CONTAINER_RUNTIME_PREFIX + "docker."; @@ -3089,6 +3171,14 @@ public static boolean areNodeLabelsEnabled( public static final String NM_SCRIPT_BASED_NODE_LABELS_PROVIDER_SCRIPT_OPTS = NM_SCRIPT_BASED_NODE_LABELS_PROVIDER_PREFIX + "opts"; + /* + * Support to view apps for given user in secure cluster. + */ + public static final String DISPLAY_APPS_FOR_LOGGED_IN_USER = + RM_PREFIX + "display.per-user-apps"; + public static final boolean DEFAULT_DISPLAY_APPS_FOR_LOGGED_IN_USER = + false; + // RM and NM CSRF props public static final String REST_CSRF = "webapp.rest-csrf."; public static final String RM_CSRF_PREFIX = RM_PREFIX + REST_CSRF; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/FileMetadata.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ResourceNotFoundException.java similarity index 57% rename from hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/FileMetadata.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ResourceNotFoundException.java index 2746af496c2..b5fece7dc8c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/FileMetadata.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ResourceNotFoundException.java @@ -16,44 +16,30 @@ * limitations under the License. */ -package org.apache.hadoop.fs.s3native; +package org.apache.hadoop.yarn.exceptions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; /** - *

- * Holds basic metadata for a file stored in a {@link NativeFileSystemStore}. - *

+ * This exception is thrown when details of an unknown resource type + * are requested. */ -@InterfaceAudience.Private +@InterfaceAudience.Public @InterfaceStability.Unstable -class FileMetadata { - private final String key; - private final long length; - private final long lastModified; - - public FileMetadata(String key, long length, long lastModified) { - this.key = key; - this.length = length; - this.lastModified = lastModified; - } - - public String getKey() { - return key; - } - - public long getLength() { - return length; +public class ResourceNotFoundException extends YarnRuntimeException { + + private static final long serialVersionUID = 10081982L; + + public ResourceNotFoundException(String message) { + super(message); } - public long getLastModified() { - return lastModified; + public ResourceNotFoundException(Throwable cause) { + super(cause); } - - @Override - public String toString() { - return "FileMetadata[" + key + ", " + length + ", " + lastModified + "]"; + + public ResourceNotFoundException(String message, Throwable cause) { + super(message, cause); } - } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesClassicOutput.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ResourceProfilesNotEnabledException.java similarity index 55% rename from hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesClassicOutput.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ResourceProfilesNotEnabledException.java index 551956bd8d2..558e075bea5 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesClassicOutput.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ResourceProfilesNotEnabledException.java @@ -1,4 +1,4 @@ -/* +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -16,26 +16,28 @@ * limitations under the License. */ -package org.apache.hadoop.fs.s3a.scale; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.s3a.Constants; +package org.apache.hadoop.yarn.exceptions; /** - * Use classic output for writing things; tweaks the configuration to do - * this after it has been set up in the superclass. - * The generator test has been copied and re + * This exception is thrown when the client requests information about + * ResourceProfiles in the + * {@link org.apache.hadoop.yarn.api.ApplicationClientProtocol} but resource + * profiles is not enabled on the RM. + * */ -public class ITestS3AHugeFilesClassicOutput extends AbstractSTestS3AHugeFiles { +public class ResourceProfilesNotEnabledException extends YarnException { - @Override - protected Configuration createScaleConfiguration() { - final Configuration conf = super.createScaleConfiguration(); - conf.setBoolean(Constants.FAST_UPLOAD, false); - return conf; + private static final long serialVersionUID = 13498237L; + + public ResourceProfilesNotEnabledException(Throwable cause) { + super(cause); } - protected String getBlockOutputBufferName() { - return "classic"; + public ResourceProfilesNotEnabledException(String message) { + super(message); + } + + public ResourceProfilesNotEnabledException(String message, Throwable cause) { + super(message, cause); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/YARNFeatureNotEnabledException.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/YARNFeatureNotEnabledException.java new file mode 100644 index 00000000000..62340fea363 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/YARNFeatureNotEnabledException.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.exceptions; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * This exception is thrown when a feature is being used which is not enabled + * yet. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public class YARNFeatureNotEnabledException extends YarnException { + private static final long serialVersionUID = 898023752676L; + + public YARNFeatureNotEnabledException(Throwable cause) { + super(cause); + } + + public YARNFeatureNotEnabledException(String message) { + super(message); + } + + public YARNFeatureNotEnabledException(String message, Throwable cause) { + super(message, cause); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/UnitsConversionUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/UnitsConversionUtil.java new file mode 100644 index 00000000000..7a212e163d9 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/UnitsConversionUtil.java @@ -0,0 +1,221 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.util; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import java.math.BigInteger; +import java.util.*; + +/** + * A util to convert values in one unit to another. Units refers to whether + * the value is expressed in pico, nano, etc. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class UnitsConversionUtil { + + /** + * Helper class for encapsulating conversion values. + */ + public static class Converter { + private long numerator; + private long denominator; + + Converter(long n, long d) { + this.numerator = n; + this.denominator = d; + } + } + + private static final String[] UNITS = {"p", "n", "u", "m", "", "k", "M", "G", + "T", "P", "Ki", "Mi", "Gi", "Ti", "Pi"}; + private static final List SORTED_UNITS = Arrays.asList(UNITS); + public static final Set KNOWN_UNITS = createKnownUnitsSet(); + private static final Converter PICO = + new Converter(1L, 1000L * 1000L * 1000L * 1000L); + private static final Converter NANO = + new Converter(1L, 1000L * 1000L * 1000L); + private static final Converter MICRO = new Converter(1L, 1000L * 1000L); + private static final Converter MILLI = new Converter(1L, 1000L); + private static final Converter BASE = new Converter(1L, 1L); + private static final Converter KILO = new Converter(1000L, 1L); + private static final Converter MEGA = new Converter(1000L * 1000L, 1L); + private static final Converter GIGA = + new Converter(1000L * 1000L * 1000L, 1L); + private static final Converter TERA = + new Converter(1000L * 1000L * 1000L * 1000L, 1L); + private static final Converter PETA = + new Converter(1000L * 1000L * 1000L * 1000L * 1000L, 1L); + + private static final Converter KILO_BINARY = new Converter(1024L, 1L); + private static final Converter MEGA_BINARY = new Converter(1024L * 1024L, 1L); + private static final Converter GIGA_BINARY = + new Converter(1024L * 1024L * 1024L, 1L); + private static final Converter TERA_BINARY = + new Converter(1024L * 1024L * 1024L * 1024L, 1L); + private static final Converter PETA_BINARY = + new Converter(1024L * 1024L * 1024L * 1024L * 1024L, 1L); + + private static Set createKnownUnitsSet() { + Set ret = new HashSet<>(); + ret.addAll(Arrays.asList(UNITS)); + return ret; + } + + private static Converter getConverter(String unit) { + switch (unit) { + case "p": + return PICO; + case "n": + return NANO; + case "u": + return MICRO; + case "m": + return MILLI; + case "": + return BASE; + case "k": + return KILO; + case "M": + return MEGA; + case "G": + return GIGA; + case "T": + return TERA; + case "P": + return PETA; + case "Ki": + return KILO_BINARY; + case "Mi": + return MEGA_BINARY; + case "Gi": + return GIGA_BINARY; + case "Ti": + return TERA_BINARY; + case "Pi": + return PETA_BINARY; + default: + throw new IllegalArgumentException( + "Unknown unit '" + unit + "'. Known units are " + KNOWN_UNITS); + } + } + + /** + * Converts a value from one unit to another. Supported units can be obtained + * by inspecting the KNOWN_UNITS set. + * + * @param fromUnit the unit of the from value + * @param toUnit the target unit + * @param fromValue the value you wish to convert + * @return the value in toUnit + */ + public static long convert(String fromUnit, String toUnit, long fromValue) { + if (toUnit == null || fromUnit == null) { + throw new IllegalArgumentException("One or more arguments are null"); + } + + if (fromUnit.equals(toUnit)) { + return fromValue; + } + Converter fc = getConverter(fromUnit); + Converter tc = getConverter(toUnit); + long numerator = fc.numerator * tc.denominator; + long denominator = fc.denominator * tc.numerator; + long numeratorMultiplierLimit = Long.MAX_VALUE / numerator; + if (numerator < denominator) { + if (numeratorMultiplierLimit < fromValue) { + String overflowMsg = + "Converting " + fromValue + " from '" + fromUnit + "' to '" + toUnit + + "' will result in an overflow of Long"; + throw new IllegalArgumentException(overflowMsg); + } + return (fromValue * numerator) / denominator; + } + if (numeratorMultiplierLimit > fromValue) { + return (numerator * fromValue) / denominator; + } + long tmp = numerator / denominator; + if ((Long.MAX_VALUE / tmp) < fromValue) { + String overflowMsg = + "Converting " + fromValue + " from '" + fromUnit + "' to '" + toUnit + + "' will result in an overflow of Long"; + throw new IllegalArgumentException(overflowMsg); + } + return fromValue * tmp; + } + + /** + * Compare a value in a given unit with a value in another unit. The return + * value is equivalent to the value returned by compareTo. + * + * @param unitA first unit + * @param valueA first value + * @param unitB second unit + * @param valueB second value + * @return +1, 0 or -1 depending on whether the relationship is greater than, + * equal to or lesser than + */ + public static int compare(String unitA, long valueA, String unitB, + long valueB) { + if (unitA == null || unitB == null || !KNOWN_UNITS.contains(unitA) + || !KNOWN_UNITS.contains(unitB)) { + throw new IllegalArgumentException("Units cannot be null"); + } + if (!KNOWN_UNITS.contains(unitA)) { + throw new IllegalArgumentException("Unknown unit '" + unitA + "'"); + } + if (!KNOWN_UNITS.contains(unitB)) { + throw new IllegalArgumentException("Unknown unit '" + unitB + "'"); + } + if (unitA.equals(unitB)) { + return Long.compare(valueA, valueB); + } + Converter unitAC = getConverter(unitA); + Converter unitBC = getConverter(unitB); + int unitAPos = SORTED_UNITS.indexOf(unitA); + int unitBPos = SORTED_UNITS.indexOf(unitB); + try { + long tmpA = valueA; + long tmpB = valueB; + if (unitAPos < unitBPos) { + tmpB = convert(unitB, unitA, valueB); + } else { + tmpA = convert(unitA, unitB, valueA); + } + return Long.compare(tmpA, tmpB); + } catch (IllegalArgumentException ie) { + BigInteger tmpA = BigInteger.valueOf(valueA); + BigInteger tmpB = BigInteger.valueOf(valueB); + if (unitAPos < unitBPos) { + tmpB = tmpB.multiply(BigInteger.valueOf(unitBC.numerator)); + tmpB = tmpB.multiply(BigInteger.valueOf(unitAC.denominator)); + tmpB = tmpB.divide(BigInteger.valueOf(unitBC.denominator)); + tmpB = tmpB.divide(BigInteger.valueOf(unitAC.numerator)); + } else { + tmpA = tmpA.multiply(BigInteger.valueOf(unitAC.numerator)); + tmpA = tmpA.multiply(BigInteger.valueOf(unitBC.denominator)); + tmpA = tmpA.divide(BigInteger.valueOf(unitAC.denominator)); + tmpA = tmpA.divide(BigInteger.valueOf(unitBC.numerator)); + } + return tmpA.compareTo(tmpB); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java new file mode 100644 index 00000000000..0564d749bbc --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java @@ -0,0 +1,576 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.util.resource; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; +import org.apache.hadoop.yarn.conf.ConfigurationProvider; +import org.apache.hadoop.yarn.conf.ConfigurationProviderFactory; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Helper class to read the resource-types to be supported by the system. + */ +public class ResourceUtils { + + public static final String UNITS = ".units"; + public static final String TYPE = ".type"; + public static final String MINIMUM_ALLOCATION = ".minimum-allocation"; + public static final String MAXIMUM_ALLOCATION = ".maximum-allocation"; + + private static final String MEMORY = ResourceInformation.MEMORY_MB.getName(); + private static final String VCORES = ResourceInformation.VCORES.getName(); + + private static volatile boolean initializedResources = false; + private static final Map RESOURCE_NAME_TO_INDEX = + new ConcurrentHashMap(); + private static volatile Map resourceTypes; + private static volatile ResourceInformation[] resourceTypesArray; + private static volatile boolean initializedNodeResources = false; + private static volatile Map readOnlyNodeResources; + private static volatile int numKnownResourceTypes = -1; + + static final Log LOG = LogFactory.getLog(ResourceUtils.class); + + private ResourceUtils() { + } + + private static void checkMandatoryResources( + Map resourceInformationMap) + throws YarnRuntimeException { + /* + * Supporting 'memory' also as invalid resource name, in addition to + * 'MEMORY' for historical reasons + */ + String key = "memory"; + if (resourceInformationMap.containsKey(key)) { + LOG.warn("Attempt to define resource '" + key + + "', but it is not allowed."); + throw new YarnRuntimeException("Attempt to re-define mandatory resource '" + + key + "'."); + } + + if (resourceInformationMap.containsKey(MEMORY)) { + ResourceInformation memInfo = resourceInformationMap.get(MEMORY); + String memUnits = ResourceInformation.MEMORY_MB.getUnits(); + ResourceTypes memType = ResourceInformation.MEMORY_MB.getResourceType(); + if (!memInfo.getUnits().equals(memUnits) || !memInfo.getResourceType() + .equals(memType)) { + throw new YarnRuntimeException( + "Attempt to re-define mandatory resource 'memory-mb'. It can only" + + " be of type 'COUNTABLE' and have units 'Mi'."); + } + } + + if (resourceInformationMap.containsKey(VCORES)) { + ResourceInformation vcoreInfo = resourceInformationMap.get(VCORES); + String vcoreUnits = ResourceInformation.VCORES.getUnits(); + ResourceTypes vcoreType = ResourceInformation.VCORES.getResourceType(); + if (!vcoreInfo.getUnits().equals(vcoreUnits) || !vcoreInfo + .getResourceType().equals(vcoreType)) { + throw new YarnRuntimeException( + "Attempt to re-define mandatory resource 'vcores'. It can only be" + + " of type 'COUNTABLE' and have units ''(no units)."); + } + } + } + + private static void addMandatoryResources( + Map res) { + ResourceInformation ri; + if (!res.containsKey(MEMORY)) { + LOG.info("Adding resource type - name = " + MEMORY + ", units = " + + ResourceInformation.MEMORY_MB.getUnits() + ", type = " + + ResourceTypes.COUNTABLE); + ri = ResourceInformation + .newInstance(MEMORY, + ResourceInformation.MEMORY_MB.getUnits()); + res.put(MEMORY, ri); + } + if (!res.containsKey(VCORES)) { + LOG.info("Adding resource type - name = " + VCORES + ", units = , type = " + + ResourceTypes.COUNTABLE); + ri = + ResourceInformation.newInstance(VCORES); + res.put(VCORES, ri); + } + } + + private static void setMinimumAllocationForMandatoryResources( + Map res, Configuration conf) { + String[][] resourceTypesKeys = { + {ResourceInformation.MEMORY_MB.getName(), + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, + String.valueOf( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB), + ResourceInformation.MEMORY_MB.getName()}, + {ResourceInformation.VCORES.getName(), + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + String.valueOf( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES), + ResourceInformation.VCORES.getName()}}; + for (String[] arr : resourceTypesKeys) { + String resourceTypesKey = + YarnConfiguration.RESOURCE_TYPES + "." + arr[0] + MINIMUM_ALLOCATION; + long minimumResourceTypes = conf.getLong(resourceTypesKey, -1); + long minimumConf = conf.getLong(arr[1], -1); + long minimum; + if (minimumResourceTypes != -1) { + minimum = minimumResourceTypes; + if (minimumConf != -1) { + LOG.warn("Using minimum allocation for memory specified in " + + "resource-types config file with key " + + minimumResourceTypes + ", ignoring minimum specified using " + + arr[1]); + } + } else { + minimum = conf.getLong(arr[1], Long.parseLong(arr[2])); + } + ResourceInformation ri = res.get(arr[3]); + ri.setMinimumAllocation(minimum); + } + } + + private static void setMaximumAllocationForMandatoryResources( + Map res, Configuration conf) { + String[][] resourceTypesKeys = { + {ResourceInformation.MEMORY_MB.getName(), + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, + String.valueOf( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB), + ResourceInformation.MEMORY_MB.getName()}, + {ResourceInformation.VCORES.getName(), + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, + String.valueOf( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES), + ResourceInformation.VCORES.getName()}}; + for (String[] arr : resourceTypesKeys) { + String resourceTypesKey = + YarnConfiguration.RESOURCE_TYPES + "." + arr[0] + MAXIMUM_ALLOCATION; + long maximumResourceTypes = conf.getLong(resourceTypesKey, -1); + long maximumConf = conf.getLong(arr[1], -1); + long maximum; + if (maximumResourceTypes != -1) { + maximum = maximumResourceTypes; + if (maximumConf != -1) { + LOG.warn("Using maximum allocation for memory specified in " + + "resource-types config file with key " + + maximumResourceTypes + ", ignoring maximum specified using " + + arr[1]); + } + } else { + maximum = conf.getLong(arr[1], Long.parseLong(arr[2])); + } + ResourceInformation ri = res.get(arr[3]); + ri.setMaximumAllocation(maximum); + } + } + + @VisibleForTesting + static void initializeResourcesMap(Configuration conf) { + + Map resourceInformationMap = new HashMap<>(); + String[] resourceNames = conf.getStrings(YarnConfiguration.RESOURCE_TYPES); + + if (resourceNames != null && resourceNames.length != 0) { + for (String resourceName : resourceNames) { + String resourceUnits = conf.get( + YarnConfiguration.RESOURCE_TYPES + "." + resourceName + UNITS, ""); + String resourceTypeName = conf.get( + YarnConfiguration.RESOURCE_TYPES + "." + resourceName + TYPE, + ResourceTypes.COUNTABLE.toString()); + Long minimumAllocation = conf.getLong( + YarnConfiguration.RESOURCE_TYPES + "." + resourceName + + MINIMUM_ALLOCATION, 0L); + Long maximumAllocation = conf.getLong( + YarnConfiguration.RESOURCE_TYPES + "." + resourceName + + MAXIMUM_ALLOCATION, Long.MAX_VALUE); + if (resourceName == null || resourceName.isEmpty() + || resourceUnits == null || resourceTypeName == null) { + throw new YarnRuntimeException( + "Incomplete configuration for resource type '" + resourceName + + "'. One of name, units or type is configured incorrectly."); + } + ResourceTypes resourceType = ResourceTypes.valueOf(resourceTypeName); + LOG.info("Adding resource type - name = " + resourceName + ", units = " + + resourceUnits + ", type = " + resourceTypeName); + if (resourceInformationMap.containsKey(resourceName)) { + throw new YarnRuntimeException( + "Error in config, key '" + resourceName + "' specified twice"); + } + resourceInformationMap.put(resourceName, ResourceInformation + .newInstance(resourceName, resourceUnits, 0L, resourceType, + minimumAllocation, maximumAllocation)); + } + } + + checkMandatoryResources(resourceInformationMap); + addMandatoryResources(resourceInformationMap); + + setMinimumAllocationForMandatoryResources(resourceInformationMap, conf); + setMaximumAllocationForMandatoryResources(resourceInformationMap, conf); + + initializeResourcesFromResourceInformationMap(resourceInformationMap); + } + + /** + * This method is visible for testing, unit test can construct a + * resourceInformationMap and pass it to this method to initialize multiple resources. + * @param resourceInformationMap constructed resource information map. + */ + @VisibleForTesting + public static void initializeResourcesFromResourceInformationMap( + Map resourceInformationMap) { + resourceTypes = Collections.unmodifiableMap(resourceInformationMap); + updateKnownResources(); + updateResourceTypeIndex(); + initializedResources = true; + } + + private static void updateKnownResources() { + // Update resource names. + resourceTypesArray = new ResourceInformation[resourceTypes.size()]; + + int index = 2; + for (ResourceInformation resInfo : resourceTypes.values()) { + if (resInfo.getName().equals(MEMORY)) { + resourceTypesArray[0] = ResourceInformation + .newInstance(resourceTypes.get(MEMORY)); + } else if (resInfo.getName().equals(VCORES)) { + resourceTypesArray[1] = ResourceInformation + .newInstance(resourceTypes.get(VCORES)); + } else { + resourceTypesArray[index] = ResourceInformation.newInstance(resInfo); + index++; + } + } + } + + private static void updateResourceTypeIndex() { + RESOURCE_NAME_TO_INDEX.clear(); + + for (int index = 0; index < resourceTypesArray.length; index++) { + ResourceInformation resInfo = resourceTypesArray[index]; + RESOURCE_NAME_TO_INDEX.put(resInfo.getName(), index); + } + } + + /** + * Get associate index of resource types such memory, cpu etc. + * This could help to access each resource types in a resource faster. + * @return Index map for all Resource Types. + */ + public static Map getResourceTypeIndex() { + return RESOURCE_NAME_TO_INDEX; + } + + /** + * Get the resource types to be supported by the system. + * @return A map of the resource name to a ResouceInformation object + * which contains details such as the unit. + */ + public static Map getResourceTypes() { + return getResourceTypes(null, + YarnConfiguration.RESOURCE_TYPES_CONFIGURATION_FILE); + } + + public static ResourceInformation[] getResourceTypesArray() { + initializeResourceTypesIfNeeded(null, + YarnConfiguration.RESOURCE_TYPES_CONFIGURATION_FILE); + return resourceTypesArray; + } + + public static int getNumberOfKnownResourceTypes() { + if (numKnownResourceTypes < 0) { + initializeResourceTypesIfNeeded(null, + YarnConfiguration.RESOURCE_TYPES_CONFIGURATION_FILE); + } + return numKnownResourceTypes; + } + + private static Map getResourceTypes( + Configuration conf) { + return getResourceTypes(conf, + YarnConfiguration.RESOURCE_TYPES_CONFIGURATION_FILE); + } + + private static void initializeResourceTypesIfNeeded(Configuration conf, + String resourceFile) { + if (!initializedResources) { + synchronized (ResourceUtils.class) { + if (!initializedResources) { + if (conf == null) { + conf = new YarnConfiguration(); + } + try { + addResourcesFileToConf(resourceFile, conf); + LOG.debug("Found " + resourceFile + ", adding to configuration"); + } catch (FileNotFoundException fe) { + LOG.debug("Unable to find '" + resourceFile + "'."); + } + + initializeResourcesMap(conf); + } + } + } + numKnownResourceTypes = resourceTypes.size(); + } + + private static Map getResourceTypes( + Configuration conf, String resourceFile) { + initializeResourceTypesIfNeeded(conf, resourceFile); + return resourceTypes; + } + + private static InputStream getConfInputStream(String resourceFile, + Configuration conf) throws IOException, YarnException { + + ConfigurationProvider provider = + ConfigurationProviderFactory.getConfigurationProvider(conf); + try { + provider.init(conf); + } catch (Exception e) { + throw new IOException(e); + } + + InputStream ris = provider.getConfigurationInputStream(conf, resourceFile); + if (ris == null) { + if (conf.getResource(resourceFile) == null) { + throw new FileNotFoundException("Unable to find " + resourceFile); + } + throw new IOException( + "Unable to open resource types file '" + resourceFile + + "'. Using provider " + provider); + } + return ris; + } + + private static void addResourcesFileToConf(String resourceFile, + Configuration conf) throws FileNotFoundException { + try { + InputStream ris = getConfInputStream(resourceFile, conf); + LOG.debug("Found " + resourceFile + ", adding to configuration"); + conf.addResource(ris); + } catch (FileNotFoundException fe) { + throw fe; + } catch (IOException ie) { + LOG.fatal("Exception trying to read resource types configuration '" + + resourceFile + "'.", ie); + throw new YarnRuntimeException(ie); + } catch (YarnException ye) { + LOG.fatal("YARN Exception trying to read resource types configuration '" + + resourceFile + "'.", ye); + throw new YarnRuntimeException(ye); + } + } + + @VisibleForTesting + synchronized static void resetResourceTypes() { + initializedResources = false; + } + + @VisibleForTesting + public static Map + resetResourceTypes(Configuration conf) { + synchronized (ResourceUtils.class) { + initializedResources = false; + } + return getResourceTypes(conf); + } + + public static String getUnits(String resourceValue) { + String units; + for (int i = 0; i < resourceValue.length(); i++) { + if (Character.isAlphabetic(resourceValue.charAt(i))) { + units = resourceValue.substring(i); + if (StringUtils.isAlpha(units)) { + return units; + } + } + } + return ""; + } + + /** + * Function to get the resources for a node. This function will look at the + * file {@link YarnConfiguration#NODE_RESOURCES_CONFIGURATION_FILE} to + * determine the node resources. + * + * @param conf configuration file + * @return a map to resource name to the ResourceInformation object. The map + * is guaranteed to have entries for memory and vcores + */ + public static Map getNodeResourceInformation( + Configuration conf) { + if (!initializedNodeResources) { + synchronized (ResourceUtils.class) { + if (!initializedNodeResources) { + Map nodeResources = initializeNodeResourceInformation( + conf); + addMandatoryResources(nodeResources); + checkMandatoryResources(nodeResources); + setMinimumAllocationForMandatoryResources(nodeResources, conf); + setMaximumAllocationForMandatoryResources(nodeResources, conf); + readOnlyNodeResources = Collections.unmodifiableMap(nodeResources); + initializedNodeResources = true; + } + } + } + return readOnlyNodeResources; + } + + private static Map initializeNodeResourceInformation( + Configuration conf) { + Map nodeResources = new HashMap<>(); + try { + addResourcesFileToConf( + YarnConfiguration.NODE_RESOURCES_CONFIGURATION_FILE, conf); + for (Map.Entry entry : conf) { + String key = entry.getKey(); + String value = entry.getValue(); + if (key.startsWith(YarnConfiguration.NM_RESOURCES_PREFIX)) { + addResourceInformation(key, value, nodeResources); + } + } + } catch (FileNotFoundException fe) { + LOG.info("Couldn't find node resources file"); + } + return nodeResources; + } + + private static void addResourceInformation(String prop, String value, + Map nodeResources) { + String[] parts = prop.split("\\."); + LOG.info("Found resource entry " + prop); + if (parts.length == 4) { + String resourceType = parts[3]; + if (!nodeResources.containsKey(resourceType)) { + nodeResources + .put(resourceType, ResourceInformation.newInstance(resourceType)); + } + String units = getUnits(value); + Long resourceValue = + Long.valueOf(value.substring(0, value.length() - units.length())); + nodeResources.get(resourceType).setValue(resourceValue); + nodeResources.get(resourceType).setUnits(units); + LOG.debug("Setting value for resource type " + resourceType + " to " + + resourceValue + " with units " + units); + } + } + + @VisibleForTesting + synchronized public static void resetNodeResources() { + initializedNodeResources = false; + } + + public static Resource getResourceTypesMinimumAllocation() { + Resource ret = Resource.newInstance(0, 0); + for (ResourceInformation entry : resourceTypesArray) { + String name = entry.getName(); + if (name.equals(ResourceInformation.MEMORY_MB.getName())) { + ret.setMemorySize(entry.getMinimumAllocation()); + } else if (name.equals(ResourceInformation.VCORES.getName())) { + Long tmp = entry.getMinimumAllocation(); + if (tmp > Integer.MAX_VALUE) { + tmp = (long) Integer.MAX_VALUE; + } + ret.setVirtualCores(tmp.intValue()); + } else { + ret.setResourceValue(name, entry.getMinimumAllocation()); + } + } + return ret; + } + + /** + * Get a Resource object with for the maximum allocation possible. + * @return a Resource object with the maximum allocation for the scheduler + */ + public static Resource getResourceTypesMaximumAllocation() { + Resource ret = Resource.newInstance(0, 0); + for (ResourceInformation entry : resourceTypesArray) { + String name = entry.getName(); + if (name.equals(ResourceInformation.MEMORY_MB.getName())) { + ret.setMemorySize(entry.getMaximumAllocation()); + } else if (name.equals(ResourceInformation.VCORES.getName())) { + Long tmp = entry.getMaximumAllocation(); + if (tmp > Integer.MAX_VALUE) { + tmp = (long) Integer.MAX_VALUE; + } + ret.setVirtualCores(tmp.intValue()); + continue; + } else { + ret.setResourceValue(name, entry.getMaximumAllocation()); + } + } + return ret; + } + + /** + * Get default unit by given resource type. + * @param resourceType resourceType + * @return default unit + */ + public static String getDefaultUnit(String resourceType) { + ResourceInformation ri = getResourceTypes().get(resourceType); + if (ri != null) { + return ri.getUnits(); + } + return ""; + } + + /** + * Get all resource types information from known resource types. + * @return List of ResourceTypeInfo + */ + public static List getResourcesTypeInfo() { + List array = new ArrayList<>(); + // Add all resource types + Collection resourcesInfo = + ResourceUtils.getResourceTypes().values(); + for (ResourceInformation resourceInfo : resourcesInfo) { + array.add(ResourceTypeInfo + .newInstance(resourceInfo.getName(), resourceInfo.getUnits(), + resourceInfo.getResourceType())); + } + return array; + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/ITestInMemoryNativeS3FileSystemContract.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/package-info.java similarity index 67% rename from hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/ITestInMemoryNativeS3FileSystemContract.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/package-info.java index adbf95074e8..d7c799d7cbf 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/ITestInMemoryNativeS3FileSystemContract.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/util/resource/package-info.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -15,19 +15,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -package org.apache.hadoop.fs.s3native; - -import java.io.IOException; - /** - * S3N basic contract tests through mock in-memory S3 implementation. + * Package org.apache.hadoop.yarn.util.resource contains classes + * which is used as utility class for resource profile computations. */ -public class ITestInMemoryNativeS3FileSystemContract - extends NativeS3FileSystemContractBaseTest { - - @Override - NativeFileSystemStore getNativeFileSystemStore() throws IOException { - return new InMemoryNativeFileSystemStore(); - } -} +package org.apache.hadoop.yarn.util.resource; \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto index ba79db09a6f..81adef19335 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/applicationclient_protocol.proto @@ -61,4 +61,7 @@ service ApplicationClientProtocolService { rpc updateApplicationPriority (UpdateApplicationPriorityRequestProto) returns (UpdateApplicationPriorityResponseProto); rpc signalToContainer(SignalContainerRequestProto) returns (SignalContainerResponseProto); rpc updateApplicationTimeouts (UpdateApplicationTimeoutsRequestProto) returns (UpdateApplicationTimeoutsResponseProto); + rpc getResourceProfiles(GetAllResourceProfilesRequestProto) returns (GetAllResourceProfilesResponseProto); + rpc getResourceProfile(GetResourceProfileRequestProto) returns (GetResourceProfileResponseProto); + rpc getResourceTypeInfo(GetAllResourceTypeInfoRequestProto) returns (GetAllResourceTypeInfoResponseProto); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index c5f485fc3f7..066441cc3b7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -53,9 +53,27 @@ message ContainerIdProto { optional int64 id = 3; } +enum ResourceTypesProto { + COUNTABLE = 0; +} + +message ResourceInformationProto { + required string key = 1; + optional int64 value = 2; + optional string units = 3; + optional ResourceTypesProto type = 4; +} + +message ResourceTypeInfoProto { + required string name = 1; + optional string units = 2; + optional ResourceTypesProto type = 3; +} + message ResourceProto { optional int64 memory = 1; optional int32 virtual_cores = 2; + repeated ResourceInformationProto resource_value_map = 3; } message ResourceUtilizationProto { @@ -69,6 +87,15 @@ message ResourceOptionProto { optional int32 over_commit_timeout = 2; } +message ResourceProfileEntry { + required string name = 1; + required ResourceProto resources = 2; +} + +message ResourceProfilesProto { + repeated ResourceProfileEntry resource_profiles_map = 1; +} + message NodeResourceMapProto { optional NodeIdProto node_id = 1; optional ResourceOptionProto resource_option = 2; @@ -83,6 +110,7 @@ enum ContainerStateProto { C_RUNNING = 2; C_COMPLETE = 3; C_SCHEDULED = 4; + C_PAUSED = 5; } message ContainerProto { @@ -174,6 +202,11 @@ message LocalResourceProto { optional bool should_be_uploaded_to_shared_cache = 7; } +message StringLongMapProto { + required string key = 1; + required int64 value = 2; +} + message ApplicationResourceUsageReportProto { optional int32 num_used_containers = 1; optional int32 num_reserved_containers = 2; @@ -186,6 +219,8 @@ message ApplicationResourceUsageReportProto { optional float cluster_usage_percentage = 9; optional int64 preempted_memory_seconds = 10; optional int64 preempted_vcore_seconds = 11; + repeated StringLongMapProto application_resource_usage_map = 12; + repeated StringLongMapProto application_preempted_resource_usage_map = 13; } message ApplicationReportProto { @@ -310,6 +345,11 @@ enum ExecutionTypeProto { //////////////////////////////////////////////////////////////////////// ////// From AM_RM_Protocol ///////////////////////////////////////////// //////////////////////////////////////////////////////////////////////// +message ProfileCapabilityProto { + required string profile = 1; + required ResourceProto profileCapabilityOverride = 2; +} + message ResourceRequestProto { optional PriorityProto priority = 1; optional string resource_name = 2; @@ -319,6 +359,7 @@ message ResourceRequestProto { optional string node_label_expression = 6; optional ExecutionTypeRequestProto execution_type_request = 7; optional int64 allocation_request_id = 8 [default = 0]; + optional ProfileCapabilityProto profile = 9; } message ExecutionTypeRequestProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index 7a7f03503ca..3da4ee7298a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -48,6 +48,7 @@ message RegisterApplicationMasterResponseProto { optional string queue = 5; repeated NMTokenProto nm_tokens_from_previous_attempts = 6; repeated SchedulerResourceTypes scheduler_resource_types = 7; + optional ResourceProfilesProto resource_profiles = 8; } message FinishApplicationMasterRequestProto { @@ -279,6 +280,28 @@ message UpdateApplicationTimeoutsResponseProto { repeated ApplicationUpdateTimeoutMapProto application_timeouts = 1; } +message GetAllResourceProfilesRequestProto { +} + +message GetAllResourceProfilesResponseProto { + required ResourceProfilesProto resource_profiles = 1; +} + +message GetResourceProfileRequestProto { + required string profile = 1; +} + +message GetResourceProfileResponseProto { + required ResourceProto resources = 1; +} + +message GetAllResourceTypeInfoRequestProto { +} + +message GetAllResourceTypeInfoResponseProto { + repeated ResourceTypeInfoProto resource_type_info = 1; +} + ////////////////////////////////////////////////////// /////// client_NM_Protocol /////////////////////////// ////////////////////////////////////////////////////// diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestResourceInformation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestResourceInformation.java new file mode 100644 index 00000000000..66bf3204bf6 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestResourceInformation.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.conf; + +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test class to verify various resource informations in a given resource. + */ +public class TestResourceInformation { + + @Test + public void testName() { + String name = "yarn.io/test"; + ResourceInformation ri = ResourceInformation.newInstance(name); + Assert.assertEquals("Resource name incorrect", name, ri.getName()); + } + + @Test + public void testUnits() { + String name = "yarn.io/test"; + String units = "m"; + ResourceInformation ri = ResourceInformation.newInstance(name, units); + Assert.assertEquals("Resource name incorrect", name, ri.getName()); + Assert.assertEquals("Resource units incorrect", units, ri.getUnits()); + units = "z"; + try { + ResourceInformation.newInstance(name, units); + Assert.fail(units + "is not a valid unit"); + } catch (IllegalArgumentException ie) { + // do nothing + } + } + + @Test + public void testValue() { + String name = "yarn.io/test"; + long value = 1L; + ResourceInformation ri = ResourceInformation.newInstance(name, value); + Assert.assertEquals("Resource name incorrect", name, ri.getName()); + Assert.assertEquals("Resource value incorrect", value, ri.getValue()); + } + + @Test + public void testResourceInformation() { + String name = "yarn.io/test"; + long value = 1L; + String units = "m"; + ResourceInformation ri = + ResourceInformation.newInstance(name, units, value); + Assert.assertEquals("Resource name incorrect", name, ri.getName()); + Assert.assertEquals("Resource value incorrect", value, ri.getValue()); + Assert.assertEquals("Resource units incorrect", units, ri.getUnits()); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java index 1d3111ce8b0..b8fbca66936 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java @@ -143,6 +143,10 @@ public void initializeMemberVariables() { // Used as Java command line properties, not XML configurationPrefixToSkipCompare.add("yarn.app.container"); + // Ignore default file name for resource profiles + configurationPropsToSkipCompare + .add(YarnConfiguration.DEFAULT_RM_RESOURCE_PROFILES_SOURCE_FILE); + // Ignore NodeManager "work in progress" variables configurationPrefixToSkipCompare .add(YarnConfiguration.NM_NETWORK_RESOURCE_ENABLED); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/util/TestUnitsConversionUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/util/TestUnitsConversionUtil.java new file mode 100644 index 00000000000..a412faebed8 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/util/TestUnitsConversionUtil.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.util; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Test class to handle all test cases needed to verify basic unit conversion + * scenarios. + */ +public class TestUnitsConversionUtil { + + @Test + public void testUnitsConversion() { + int value = 5; + String fromUnit = ""; + long test = value; + Assert.assertEquals("pico test failed", + value * 1000L * 1000L * 1000L * 1000L, + UnitsConversionUtil.convert(fromUnit, "p", test)); + Assert.assertEquals("nano test failed", + value * 1000L * 1000L * 1000L, + UnitsConversionUtil.convert(fromUnit, "n", test)); + Assert + .assertEquals("micro test failed", value * 1000L * 1000L, + UnitsConversionUtil.convert(fromUnit, "u", test)); + Assert.assertEquals("milli test failed", value * 1000L, + UnitsConversionUtil.convert(fromUnit, "m", test)); + + test = value * 1000L * 1000L * 1000L * 1000L * 1000L; + fromUnit = ""; + Assert.assertEquals("kilo test failed", test / 1000L, + UnitsConversionUtil.convert(fromUnit, "k", test)); + + Assert + .assertEquals("mega test failed", test / (1000L * 1000L), + UnitsConversionUtil.convert(fromUnit, "M", test)); + Assert.assertEquals("giga test failed", + test / (1000L * 1000L * 1000L), + UnitsConversionUtil.convert(fromUnit, "G", test)); + Assert.assertEquals("tera test failed", + test / (1000L * 1000L * 1000L * 1000L), + UnitsConversionUtil.convert(fromUnit, "T", test)); + Assert.assertEquals("peta test failed", + test / (1000L * 1000L * 1000L * 1000L * 1000L), + UnitsConversionUtil.convert(fromUnit, "P", test)); + + Assert.assertEquals("nano to pico test failed", value * 1000L, + UnitsConversionUtil.convert("n", "p", value)); + + Assert.assertEquals("mega to giga test failed", value, + UnitsConversionUtil.convert("M", "G", value * 1000L)); + + Assert.assertEquals("Mi to Gi test failed", value, + UnitsConversionUtil.convert("Mi", "Gi", value * 1024L)); + + Assert.assertEquals("Mi to Ki test failed", value * 1024, + UnitsConversionUtil.convert("Mi", "Ki", value)); + + Assert.assertEquals("Ki to base units test failed", 5 * 1024, + UnitsConversionUtil.convert("Ki", "", 5)); + + Assert.assertEquals("Mi to k test failed", 1073741, + UnitsConversionUtil.convert("Mi", "k", 1024)); + + Assert.assertEquals("M to Mi test failed", 953, + UnitsConversionUtil.convert("M", "Mi", 1000)); + } + + @Test + public void testOverflow() { + long test = 5 * 1000L * 1000L * 1000L * 1000L * 1000L; + try { + UnitsConversionUtil.convert("P", "p", test); + Assert.fail("this operation should result in an overflow"); + } catch (IllegalArgumentException ie) { + // do nothing + } + try { + UnitsConversionUtil.convert("m", "p", Long.MAX_VALUE - 1); + Assert.fail("this operation should result in an overflow"); + } catch (IllegalArgumentException ie) { + // do nothing + } + } + + @Test + public void testCompare() { + String unitA = "P"; + long valueA = 1; + String unitB = "p"; + long valueB = 2; + Assert.assertEquals(1, + UnitsConversionUtil.compare(unitA, valueA, unitB, valueB)); + Assert.assertEquals(-1, + UnitsConversionUtil.compare(unitB, valueB, unitA, valueA)); + Assert.assertEquals(0, + UnitsConversionUtil.compare(unitA, valueA, unitA, valueA)); + Assert.assertEquals(-1, + UnitsConversionUtil.compare(unitA, valueA, unitA, valueB)); + Assert.assertEquals(1, + UnitsConversionUtil.compare(unitA, valueB, unitA, valueA)); + + unitB = "T"; + Assert.assertEquals(1, + UnitsConversionUtil.compare(unitA, valueA, unitB, valueB)); + Assert.assertEquals(-1, + UnitsConversionUtil.compare(unitB, valueB, unitA, valueA)); + Assert.assertEquals(0, + UnitsConversionUtil.compare(unitA, valueA, unitB, 1000L)); + + unitA = "p"; + unitB = "n"; + Assert.assertEquals(-1, + UnitsConversionUtil.compare(unitA, valueA, unitB, valueB)); + Assert.assertEquals(1, + UnitsConversionUtil.compare(unitB, valueB, unitA, valueA)); + Assert.assertEquals(0, + UnitsConversionUtil.compare(unitA, 1000L, unitB, valueA)); + + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java index 5ec9409096e..6d838c0bdc0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java @@ -87,6 +87,7 @@ import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ProfileCapability; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.URL; @@ -103,6 +104,7 @@ import org.apache.hadoop.yarn.client.api.async.impl.NMClientAsyncImpl; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.util.SystemClock; import org.apache.hadoop.yarn.util.TimelineServiceHelper; @@ -231,12 +233,18 @@ public enum DSEntity { @VisibleForTesting protected int numTotalContainers = 1; // Memory to request for the container on which the shell command will run - private long containerMemory = 10; + private static final long DEFAULT_CONTAINER_MEMORY = 10; + private long containerMemory = DEFAULT_CONTAINER_MEMORY; // VirtualCores to request for the container on which the shell command will run - private int containerVirtualCores = 1; + private static final int DEFAULT_CONTAINER_VCORES = 1; + private int containerVirtualCores = DEFAULT_CONTAINER_VCORES; // Priority of the request private int requestPriority; + // Resource profile for the container + private String containerResourceProfile = ""; + Map resourceProfiles; + // Counter for completed containers ( complete denotes successful or failed ) private AtomicInteger numCompletedContainers = new AtomicInteger(); // Allocated container count so that we know how many containers has the RM @@ -407,6 +415,8 @@ public boolean init(String[] args) throws ParseException, IOException { "Amount of memory in MB to be requested to run the shell command"); opts.addOption("container_vcores", true, "Amount of virtual cores to be requested to run the shell command"); + opts.addOption("container_resource_profile", true, + "Resource profile to be requested to run the shell command"); opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed"); opts.addOption("priority", true, "Application Priority. Default 0"); @@ -548,9 +558,11 @@ public boolean init(String[] args) throws ParseException, IOException { } containerMemory = Integer.parseInt(cliParser.getOptionValue( - "container_memory", "10")); + "container_memory", "-1")); containerVirtualCores = Integer.parseInt(cliParser.getOptionValue( - "container_vcores", "1")); + "container_vcores", "-1")); + containerResourceProfile = + cliParser.getOptionValue("container_resource_profile", ""); numTotalContainers = Integer.parseInt(cliParser.getOptionValue( "num_containers", "1")); if (numTotalContainers == 0) { @@ -669,6 +681,7 @@ public void run() throws YarnException, IOException, InterruptedException { RegisterApplicationMasterResponse response = amRMClient .registerApplicationMaster(appMasterHostname, appMasterRpcPort, appMasterTrackingUrl); + resourceProfiles = response.getResourceProfiles(); // Dump out information about cluster capability as seen by the // resource manager long maxMem = response.getMaximumResourceCapability().getMemorySize(); @@ -1227,12 +1240,8 @@ private ContainerRequest setupContainerAskForRM() { Priority pri = Priority.newInstance(requestPriority); // Set up resource type requirements - // For now, memory and CPU are supported so we set memory and cpu requirements - Resource capability = Resource.newInstance(containerMemory, - containerVirtualCores); - - ContainerRequest request = new ContainerRequest(capability, null, null, - pri); + ContainerRequest request = + new ContainerRequest(createProfileCapability(), null, null, pri); LOG.info("Requested container ask: " + request.toString()); return request; } @@ -1496,4 +1505,36 @@ public TimelinePutResponse run() throws Exception { } } + private ProfileCapability createProfileCapability() + throws YarnRuntimeException { + if (containerMemory < -1 || containerMemory == 0) { + throw new YarnRuntimeException("Value of AM memory '" + containerMemory + + "' has to be greater than 0"); + } + if (containerVirtualCores < -1 || containerVirtualCores == 0) { + throw new YarnRuntimeException( + "Value of AM vcores '" + containerVirtualCores + + "' has to be greater than 0"); + } + + Resource resourceCapability = + Resource.newInstance(containerMemory, containerVirtualCores); + if (resourceProfiles == null) { + containerMemory = containerMemory == -1 ? DEFAULT_CONTAINER_MEMORY : + containerMemory; + containerVirtualCores = + containerVirtualCores == -1 ? DEFAULT_CONTAINER_VCORES : + containerVirtualCores; + resourceCapability.setMemorySize(containerMemory); + resourceCapability.setVirtualCores(containerVirtualCores); + } + + String profileName = containerResourceProfile; + if ("".equals(containerResourceProfile) && resourceProfiles != null) { + profileName = "default"; + } + ProfileCapability capability = + ProfileCapability.newInstance(profileName, resourceCapability); + return capability; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java index eedb5016e4f..1a973049fc7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java @@ -66,10 +66,12 @@ import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ProfileCapability; import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.YarnClusterMetrics; @@ -79,8 +81,9 @@ import org.apache.hadoop.yarn.client.api.YarnClientApplication; import org.apache.hadoop.yarn.client.util.YarnClientUtils; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YARNFeatureNotEnabledException; import org.apache.hadoop.yarn.exceptions.YarnException; -import org.apache.hadoop.yarn.util.ConverterUtils; +import org.apache.hadoop.yarn.util.resource.Resources; import org.apache.hadoop.yarn.util.timeline.TimelineUtils; /** @@ -119,6 +122,11 @@ public class Client { private static final Log LOG = LogFactory.getLog(Client.class); + + private static final int DEFAULT_AM_MEMORY = 100; + private static final int DEFAULT_AM_VCORES = 1; + private static final int DEFAULT_CONTAINER_MEMORY = 10; + private static final int DEFAULT_CONTAINER_VCORES = 1; // Configuration private Configuration conf; @@ -130,9 +138,12 @@ public class Client { // Queue for App master private String amQueue = ""; // Amt. of memory resource to request for to run the App Master - private long amMemory = 100; + private long amMemory = DEFAULT_AM_MEMORY; // Amt. of virtual core resource to request for to run the App Master - private int amVCores = 1; + private int amVCores = DEFAULT_AM_VCORES; + + // AM resource profile + private String amResourceProfile = ""; // Application master jar file private String appMasterJar = ""; @@ -151,9 +162,11 @@ public class Client { private int shellCmdPriority = 0; // Amt of memory to request for container in which shell script will be executed - private int containerMemory = 10; + private long containerMemory = DEFAULT_CONTAINER_MEMORY; // Amt. of virtual cores to request for container in which shell script will be executed - private int containerVirtualCores = 1; + private int containerVirtualCores = DEFAULT_CONTAINER_VCORES; + // container resource profile + private String containerResourceProfile = ""; // No. of containers in which the shell script needs to be executed private int numContainers = 1; private String nodeLabelExpression = null; @@ -256,6 +269,7 @@ public Client(Configuration conf) throws Exception { opts.addOption("master_memory", true, "Amount of memory in MB to be requested to run the application master"); opts.addOption("master_vcores", true, "Amount of virtual cores to be requested to run the application master"); opts.addOption("jar", true, "Jar file containing the application master"); + opts.addOption("master_resource_profile", true, "Resource profile for the application master"); opts.addOption("shell_command", true, "Shell command to be executed by " + "the Application Master. Can only specify either --shell_command " + "or --shell_script"); @@ -269,6 +283,7 @@ public Client(Configuration conf) throws Exception { opts.addOption("shell_cmd_priority", true, "Priority for the shell command containers"); opts.addOption("container_memory", true, "Amount of memory in MB to be requested to run the shell command"); opts.addOption("container_vcores", true, "Amount of virtual cores to be requested to run the shell command"); + opts.addOption("container_resource_profile", true, "Resource profile for the shell command"); opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed"); opts.addOption("log_properties", true, "log4j.properties file"); opts.addOption("keep_containers_across_application_attempts", false, @@ -372,17 +387,11 @@ public boolean init(String[] args) throws ParseException { appName = cliParser.getOptionValue("appname", "DistributedShell"); amPriority = Integer.parseInt(cliParser.getOptionValue("priority", "0")); amQueue = cliParser.getOptionValue("queue", "default"); - amMemory = Integer.parseInt(cliParser.getOptionValue("master_memory", "100")); - amVCores = Integer.parseInt(cliParser.getOptionValue("master_vcores", "1")); - - if (amMemory < 0) { - throw new IllegalArgumentException("Invalid memory specified for application master, exiting." - + " Specified memory=" + amMemory); - } - if (amVCores < 0) { - throw new IllegalArgumentException("Invalid virtual cores specified for application master, exiting." - + " Specified virtual cores=" + amVCores); - } + amMemory = + Integer.parseInt(cliParser.getOptionValue("master_memory", "-1")); + amVCores = + Integer.parseInt(cliParser.getOptionValue("master_vcores", "-1")); + amResourceProfile = cliParser.getOptionValue("master_resource_profile", ""); if (!cliParser.hasOption("jar")) { throw new IllegalArgumentException("No jar file specified for application master"); @@ -423,17 +432,18 @@ public boolean init(String[] args) throws ParseException { } shellCmdPriority = Integer.parseInt(cliParser.getOptionValue("shell_cmd_priority", "0")); - containerMemory = Integer.parseInt(cliParser.getOptionValue("container_memory", "10")); - containerVirtualCores = Integer.parseInt(cliParser.getOptionValue("container_vcores", "1")); - numContainers = Integer.parseInt(cliParser.getOptionValue("num_containers", "1")); - + containerMemory = + Integer.parseInt(cliParser.getOptionValue("container_memory", "-1")); + containerVirtualCores = + Integer.parseInt(cliParser.getOptionValue("container_vcores", "-1")); + containerResourceProfile = + cliParser.getOptionValue("container_resource_profile", ""); + numContainers = + Integer.parseInt(cliParser.getOptionValue("num_containers", "1")); - if (containerMemory < 0 || containerVirtualCores < 0 || numContainers < 1) { - throw new IllegalArgumentException("Invalid no. of containers or container memory/vcores specified," - + " exiting." - + " Specified containerMemory=" + containerMemory - + ", containerVirtualCores=" + containerVirtualCores - + ", numContainer=" + numContainers); + if (numContainers < 1) { + throw new IllegalArgumentException("Invalid no. of containers specified," + + " exiting. Specified numContainer=" + numContainers); } nodeLabelExpression = cliParser.getOptionValue("node_label_expression", null); @@ -540,6 +550,32 @@ public boolean run() throws IOException, YarnException { prepareTimelineDomain(); } + Map profiles; + try { + profiles = yarnClient.getResourceProfiles(); + } catch (YARNFeatureNotEnabledException re) { + profiles = null; + } + + List appProfiles = new ArrayList<>(2); + appProfiles.add(amResourceProfile); + appProfiles.add(containerResourceProfile); + for (String appProfile : appProfiles) { + if (appProfile != null && !appProfile.isEmpty()) { + if (profiles == null) { + String message = "Resource profiles is not enabled"; + LOG.error(message); + throw new IOException(message); + } + if (!profiles.containsKey(appProfile)) { + String message = "Unknown resource profile '" + appProfile + + "'. Valid resource profiles are " + profiles.keySet(); + LOG.error(message); + throw new IOException(message); + } + } + } + // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); @@ -573,6 +609,13 @@ public boolean run() throws IOException, YarnException { ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); + // Set up resource type requirements + // For now, both memory and vcores are supported, so we set memory and + // vcores requirements + setAMResourceCapability(appContext, amMemory, amVCores, amResourceProfile, + amPriority, profiles); + setContainerResources(containerMemory, containerVirtualCores, profiles); + appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); @@ -696,8 +739,16 @@ public boolean run() throws IOException, YarnException { // Set class name vargs.add(appMasterMainClass); // Set params for Application Master - vargs.add("--container_memory " + String.valueOf(containerMemory)); - vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); + if (containerMemory > 0) { + vargs.add("--container_memory " + String.valueOf(containerMemory)); + } + if (containerVirtualCores > 0) { + vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); + } + if (containerResourceProfile != null && !containerResourceProfile + .isEmpty()) { + vargs.add("--container_resource_profile " + containerResourceProfile); + } vargs.add("--num_containers " + String.valueOf(numContainers)); if (null != nodeLabelExpression) { appContext.setNodeLabelExpression(nodeLabelExpression); @@ -730,12 +781,6 @@ public boolean run() throws IOException, YarnException { ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance( localResources, env, commands, null, null, null); - // Set up resource type requirements - // For now, both memory and vcores are supported, so we set memory and - // vcores requirements - Resource capability = Resource.newInstance(amMemory, amVCores); - appContext.setResource(capability); - // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); @@ -933,4 +978,65 @@ private void prepareTimelineDomain() { timelineClient.stop(); } } + + private void setAMResourceCapability(ApplicationSubmissionContext appContext, + long memory, int vcores, String profile, int priority, + Map profiles) throws IllegalArgumentException { + if (memory < -1 || memory == 0) { + throw new IllegalArgumentException("Invalid memory specified for" + + " application master, exiting. Specified memory=" + memory); + } + if (vcores < -1 || vcores == 0) { + throw new IllegalArgumentException("Invalid virtual cores specified for" + + " application master, exiting. Specified virtual cores=" + vcores); + } + String tmp = profile; + if (profile.isEmpty()) { + tmp = "default"; + } + if (appContext.getAMContainerResourceRequests() == null) { + List amResourceRequests = new ArrayList(); + amResourceRequests + .add(ResourceRequest.newInstance(Priority.newInstance(priority), "*", + Resources.clone(Resources.none()), 1)); + appContext.setAMContainerResourceRequests(amResourceRequests); + } + + if (appContext.getAMContainerResourceRequests().get(0) + .getProfileCapability() == null) { + appContext.getAMContainerResourceRequests().get(0).setProfileCapability( + ProfileCapability.newInstance(tmp, Resource.newInstance(0, 0))); + } + Resource capability = Resource.newInstance(0, 0); + // set amMemory because it's used to set Xmx param + if (profiles == null) { + amMemory = memory == -1 ? DEFAULT_AM_MEMORY : memory; + amVCores = vcores == -1 ? DEFAULT_AM_VCORES : vcores; + capability.setMemorySize(amMemory); + capability.setVirtualCores(amVCores); + } else { + amMemory = memory == -1 ? profiles.get(tmp).getMemorySize() : memory; + amVCores = vcores == -1 ? profiles.get(tmp).getVirtualCores() : vcores; + capability.setMemorySize(memory); + capability.setVirtualCores(vcores); + } + appContext.getAMContainerResourceRequests().get(0).getProfileCapability() + .setProfileCapabilityOverride(capability); + } + + private void setContainerResources(long memory, int vcores, + Map profiles) throws IllegalArgumentException { + if (memory < -1 || memory == 0) { + throw new IllegalArgumentException( + "Container memory '" + memory + "' has to be greated than 0"); + } + if (vcores < -1 || vcores == 0) { + throw new IllegalArgumentException( + "Container vcores '" + vcores + "' has to be greated than 0"); + } + if (profiles == null) { + containerMemory = memory == -1 ? DEFAULT_CONTAINER_MEMORY : memory; + containerVirtualCores = vcores == -1 ? DEFAULT_CONTAINER_VCORES : vcores; + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java index fc270cb79f2..b541cae9997 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java @@ -1122,6 +1122,7 @@ public void testDSShellWithInvalidArgs() throws Exception { "1" }; client.init(args); + client.run(); Assert.fail("Exception is expected"); } catch (IllegalArgumentException e) { Assert.assertTrue("The throw exception is not expected", @@ -1349,4 +1350,32 @@ private int verifyContainerLog(int containerNum, } return numOfWords; } + + @Test + public void testDistributedShellResourceProfiles() throws Exception { + String[][] args = { + {"--jar", APPMASTER_JAR, "--num_containers", "1", "--shell_command", + Shell.WINDOWS ? "dir" : "ls", "--container_resource_profile", + "maximum" }, + {"--jar", APPMASTER_JAR, "--num_containers", "1", "--shell_command", + Shell.WINDOWS ? "dir" : "ls", "--master_resource_profile", + "default" }, + {"--jar", APPMASTER_JAR, "--num_containers", "1", "--shell_command", + Shell.WINDOWS ? "dir" : "ls", "--master_resource_profile", + "default", "--container_resource_profile", "maximum" } + }; + + for (int i = 0; i < args.length; ++i) { + LOG.info("Initializing DS Client"); + Client client = new Client(new Configuration(yarnCluster.getConfig())); + Assert.assertTrue(client.init(args[i])); + LOG.info("Running DS Client"); + try { + client.run(); + Assert.fail("Client run should throw error"); + } catch (Exception e) { + continue; + } + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml index 8413f153f53..4654000ded0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml @@ -144,6 +144,7 @@ src/test/resources/application_1440536969523_0001.har/part-0 src/test/resources/application_1440536969523_0001.har/_masterindex src/test/resources/application_1440536969523_0001.har/_SUCCESS + src/test/resources/resource-profiles.json diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AMRMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AMRMClient.java index b11c0944dc5..e86bd12cfc9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AMRMClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AMRMClient.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Collection; +import java.util.function.Supplier; import java.util.List; import org.apache.commons.logging.Log; @@ -38,6 +39,7 @@ import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ProfileCapability; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.UpdateContainerRequest; import org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl; @@ -45,7 +47,6 @@ import org.apache.hadoop.yarn.util.resource.Resources; import com.google.common.base.Preconditions; -import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; @InterfaceAudience.Public @@ -117,7 +118,8 @@ public static class ContainerRequest { private String nodeLabelsExpression; private ExecutionTypeRequest executionTypeRequest = ExecutionTypeRequest.newInstance(); - + private String resourceProfile = ProfileCapability.DEFAULT_PROFILE; + /** * Instantiates a {@link ContainerRequest} with the given constraints and * locality relaxation enabled. @@ -163,6 +165,26 @@ public ContainerRequest(Resource capability, String[] nodes, this(capability, nodes, racks, priority, allocationRequestId, true, null, ExecutionTypeRequest.newInstance()); } + /** + * Instantiates a {@link ContainerRequest} with the given constraints and + * locality relaxation enabled. + * + * @param capability + * The {@link ProfileCapability} to be requested for each container. + * @param nodes + * Any hosts to request that the containers are placed on. + * @param racks + * Any racks to request that the containers are placed on. The + * racks corresponding to any hosts requested will be automatically + * added to this list. + * @param priority + * The priority at which to request the containers. Higher + * priorities have lower numerical values. + */ + public ContainerRequest(ProfileCapability capability, String[] nodes, + String[] racks, Priority priority) { + this(capability, nodes, racks, priority, 0, true, null); + } /** * Instantiates a {@link ContainerRequest} with the given constraints. @@ -187,6 +209,29 @@ public ContainerRequest(Resource capability, String[] nodes, this(capability, nodes, racks, priority, relaxLocality, null); } + /** + * Instantiates a {@link ContainerRequest} with the given constraints. + * + * @param capability + * The {@link ProfileCapability} to be requested for each container. + * @param nodes + * Any hosts to request that the containers are placed on. + * @param racks + * Any racks to request that the containers are placed on. The + * racks corresponding to any hosts requested will be automatically + * added to this list. + * @param priority + * The priority at which to request the containers. Higher + * priorities have lower numerical values. + * @param relaxLocality + * If true, containers for this request may be assigned on hosts + * and racks other than the ones explicitly requested. + */ + public ContainerRequest(ProfileCapability capability, String[] nodes, + String[] racks, Priority priority, boolean relaxLocality) { + this(capability, nodes, racks, priority, 0, relaxLocality, null); + } + /** * Instantiates a {@link ContainerRequest} with the given constraints. * @@ -277,10 +322,18 @@ public ContainerRequest(Resource capability, String[] nodes, String[] racks, relaxLocality, nodeLabelsExpression, ExecutionTypeRequest.newInstance()); } - + + public ContainerRequest(ProfileCapability capability, String[] nodes, + String[] racks, Priority priority, long allocationRequestId, + boolean relaxLocality, String nodeLabelsExpression) { + this(capability, nodes, racks, priority, allocationRequestId, + relaxLocality, nodeLabelsExpression, + ExecutionTypeRequest.newInstance()); + } + /** * Instantiates a {@link ContainerRequest} with the given constraints. - * + * * @param capability * The {@link Resource} to be requested for each container. * @param nodes @@ -309,6 +362,53 @@ public ContainerRequest(Resource capability, String[] nodes, String[] racks, Priority priority, long allocationRequestId, boolean relaxLocality, String nodeLabelsExpression, ExecutionTypeRequest executionTypeRequest) { + this(capability, nodes, racks, priority, allocationRequestId, + relaxLocality, nodeLabelsExpression, executionTypeRequest, + ProfileCapability.DEFAULT_PROFILE); + } + + public ContainerRequest(ProfileCapability capability, String[] nodes, + String[] racks, Priority priority, long allocationRequestId, + boolean relaxLocality, String nodeLabelsExpression, + ExecutionTypeRequest executionTypeRequest) { + this(capability.getProfileCapabilityOverride(), nodes, racks, priority, + allocationRequestId, relaxLocality, nodeLabelsExpression, + executionTypeRequest, capability.getProfileName()); + } + + /** + * Instantiates a {@link ContainerRequest} with the given constraints. + * + * @param capability + * The {@link Resource} to be requested for each container. + * @param nodes + * Any hosts to request that the containers are placed on. + * @param racks + * Any racks to request that the containers are placed on. The + * racks corresponding to any hosts requested will be automatically + * added to this list. + * @param priority + * The priority at which to request the containers. Higher + * priorities have lower numerical values. + * @param allocationRequestId + * The allocationRequestId of the request. To be used as a tracking + * id to match Containers allocated against this request. Will + * default to 0 if not specified. + * @param relaxLocality + * If true, containers for this request may be assigned on hosts + * and racks other than the ones explicitly requested. + * @param nodeLabelsExpression + * Set node labels to allocate resource, now we only support + * asking for only a single node label + * @param executionTypeRequest + * Set the execution type of the container request. + * @param profile + * Set the resource profile for the container request + */ + public ContainerRequest(Resource capability, String[] nodes, String[] racks, + Priority priority, long allocationRequestId, boolean relaxLocality, + String nodeLabelsExpression, + ExecutionTypeRequest executionTypeRequest, String profile) { this.allocationRequestId = allocationRequestId; this.capability = capability; this.nodes = (nodes != null ? ImmutableList.copyOf(nodes) : null); @@ -317,6 +417,7 @@ public ContainerRequest(Resource capability, String[] nodes, String[] racks, this.relaxLocality = relaxLocality; this.nodeLabelsExpression = nodeLabelsExpression; this.executionTypeRequest = executionTypeRequest; + this.resourceProfile = profile; sanityCheck(); } @@ -368,6 +469,10 @@ public ExecutionTypeRequest getExecutionTypeRequest() { return executionTypeRequest; } + public String getResourceProfile() { + return resourceProfile; + } + public String toString() { StringBuilder sb = new StringBuilder(); sb.append("Capability[").append(capability).append("]"); @@ -375,6 +480,7 @@ public String toString() { sb.append("AllocationRequestId[").append(allocationRequestId).append("]"); sb.append("ExecutionTypeRequest[").append(executionTypeRequest) .append("]"); + sb.append("Resource Profile[").append(resourceProfile).append("]"); return sb.toString(); } @@ -434,6 +540,11 @@ public ContainerRequestBuilder executionTypeRequest( return this; } + public ContainerRequestBuilder resourceProfile(String resourceProfile) { + containerRequest.resourceProfile = resourceProfile; + return this; + } + public ContainerRequest build() { containerRequest.sanityCheck(); return containerRequest; @@ -627,6 +738,15 @@ public List> getMatchingRequests( " AMRMClient is expected to implement this !!"); } + + @InterfaceStability.Evolving + public List> getMatchingRequests( + Priority priority, String resourceName, ExecutionType executionType, + ProfileCapability capability) { + throw new UnsupportedOperationException("The sub-class extending" + + " AMRMClient is expected to implement this !!"); + } + /** * Get outstanding ContainerRequests matching the given * allocationRequestId. These ContainerRequests should have been added via @@ -702,8 +822,8 @@ public TimelineV2Client getRegisteredTimelineV2Client() { /** * Wait for check to return true for each 1000 ms. - * See also {@link #waitFor(com.google.common.base.Supplier, int)} - * and {@link #waitFor(com.google.common.base.Supplier, int, int)} + * See also {@link #waitFor(java.util.function.Supplier, int)} + * and {@link #waitFor(java.util.function.Supplier, int, int)} * @param check the condition for which it should wait */ public void waitFor(Supplier check) throws InterruptedException { @@ -713,7 +833,7 @@ public void waitFor(Supplier check) throws InterruptedException { /** * Wait for check to return true for each * checkEveryMillis ms. - * See also {@link #waitFor(com.google.common.base.Supplier, int, int)} + * See also {@link #waitFor(java.util.function.Supplier, int, int)} * @param check user defined checker * @param checkEveryMillis interval to call check */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java index 8c68a31ffe2..60e7813e422 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java @@ -61,6 +61,8 @@ import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.ReservationDefinition; import org.apache.hadoop.yarn.api.records.ReservationId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; import org.apache.hadoop.yarn.api.records.SignalContainerCommand; import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.api.records.YarnApplicationState; @@ -70,6 +72,7 @@ import org.apache.hadoop.yarn.exceptions.ApplicationIdNotProvidedException; import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; import org.apache.hadoop.yarn.exceptions.ContainerNotFoundException; +import org.apache.hadoop.yarn.exceptions.YARNFeatureNotEnabledException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; @@ -855,4 +858,46 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( throw new UnsupportedOperationException("The sub-class extending " + YarnClient.class.getName() + " is expected to implement this !"); } + + /** + *

+ * Get the resource profiles available in the RM. + *

+ * @return a Map of the resource profile names to their capabilities + * @throws YARNFeatureNotEnabledException if resource-profile is disabled + * @throws YarnException if any error happens inside YARN + * @throws IOException in case of other errors + */ + @Public + @Unstable + public abstract Map getResourceProfiles() + throws YarnException, IOException; + + /** + *

+ * Get the details of a specific resource profile from the RM. + *

+ * @param profile the profile name + * @return resource profile name with its capabilities + * @throws YARNFeatureNotEnabledException if resource-profile is disabled + * @throws YarnException if any error happens inside YARN + * @throws IOException in case of other others + */ + @Public + @Unstable + public abstract Resource getResourceProfile(String profile) + throws YarnException, IOException; + + /** + *

+ * Get available resource types supported by RM. + *

+ * @return list of supported resource types with detailed information + * @throws YarnException if any issue happens inside YARN + * @throws IOException in case of other others + */ + @Public + @Unstable + public abstract List getResourceTypeInfo() + throws YarnException, IOException; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/AMRMClientAsync.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/AMRMClientAsync.java index 1ecfe1f588c..44a36af1363 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/AMRMClientAsync.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/AMRMClientAsync.java @@ -22,6 +22,7 @@ import java.util.Collection; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -52,7 +53,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; -import com.google.common.base.Supplier; /** * AMRMClientAsync handles communication with the ResourceManager @@ -375,8 +375,8 @@ public abstract void updateBlacklist(List blacklistAdditions, /** * Wait for check to return true for each 1000 ms. - * See also {@link #waitFor(com.google.common.base.Supplier, int)} - * and {@link #waitFor(com.google.common.base.Supplier, int, int)} + * See also {@link #waitFor(java.util.function.Supplier, int)} + * and {@link #waitFor(java.util.function.Supplier, int, int)} * @param check the condition for which it should wait */ public void waitFor(Supplier check) throws InterruptedException { @@ -386,7 +386,7 @@ public void waitFor(Supplier check) throws InterruptedException { /** * Wait for check to return true for each * checkEveryMillis ms. - * See also {@link #waitFor(com.google.common.base.Supplier, int, int)} + * See also {@link #waitFor(java.util.function.Supplier, int, int)} * @param check user defined checker * @param checkEveryMillis interval to call check */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java index 7a21bc61ab0..a41ab6ad714 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java @@ -19,7 +19,6 @@ package org.apache.hadoop.yarn.client.api.impl; import java.io.IOException; -import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -59,6 +58,7 @@ import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.NMToken; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ProfileCapability; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest; import org.apache.hadoop.yarn.api.records.ResourceRequest; @@ -105,56 +105,56 @@ public class AMRMClientImpl extends AMRMClient { protected final Set blacklistedNodes = new HashSet(); protected final Set blacklistAdditions = new HashSet(); protected final Set blacklistRemovals = new HashSet(); + + protected Map resourceProfilesMap; static class ResourceRequestInfo { ResourceRequest remoteRequest; LinkedHashSet containerRequests; - + ResourceRequestInfo(Long allocationRequestId, Priority priority, - String resourceName, Resource capability, boolean relaxLocality) { + String resourceName, Resource capability, boolean relaxLocality, + String resourceProfile) { + ProfileCapability profileCapability = ProfileCapability + .newInstance(resourceProfile, capability); remoteRequest = ResourceRequest.newBuilder().priority(priority) .resourceName(resourceName).capability(capability).numContainers(0) - .allocationRequestId(allocationRequestId) - .relaxLocality(relaxLocality).build(); + .allocationRequestId(allocationRequestId).relaxLocality(relaxLocality) + .profileCapability(profileCapability).build(); containerRequests = new LinkedHashSet(); } } /** - * Class compares Resource by memory then cpu in reverse order + * Class compares Resource by memory, then cpu and then the remaining resource + * types in reverse order. */ - static class ResourceReverseMemoryThenCpuComparator implements - Comparator, Serializable { - static final long serialVersionUID = 12345L; - @Override - public int compare(Resource arg0, Resource arg1) { - long mem0 = arg0.getMemorySize(); - long mem1 = arg1.getMemorySize(); - long cpu0 = arg0.getVirtualCores(); - long cpu1 = arg1.getVirtualCores(); - if(mem0 == mem1) { - if(cpu0 == cpu1) { - return 0; - } - if(cpu0 < cpu1) { - return 1; - } - return -1; - } - if(mem0 < mem1) { - return 1; - } - return -1; - } + static class ProfileCapabilityComparator + implements Comparator { + + HashMap resourceProfilesMap; + + public ProfileCapabilityComparator( + HashMap resourceProfileMap) { + this.resourceProfilesMap = resourceProfileMap; + } + + public int compare(T arg0, T arg1) { + Resource resource0 = + ProfileCapability.toResource(arg0, resourceProfilesMap); + Resource resource1 = + ProfileCapability.toResource(arg1, resourceProfilesMap); + return resource1.compareTo(resource0); + } } - static boolean canFit(Resource arg0, Resource arg1) { - long mem0 = arg0.getMemorySize(); - long mem1 = arg1.getMemorySize(); - long cpu0 = arg0.getVirtualCores(); - long cpu1 = arg1.getVirtualCores(); - - return (mem0 <= mem1 && cpu0 <= cpu1); + boolean canFit(ProfileCapability arg0, ProfileCapability arg1) { + Resource resource0 = + ProfileCapability.toResource(arg0, resourceProfilesMap); + Resource resource1 = + ProfileCapability.toResource(arg1, resourceProfilesMap); + return Resources.fitsIn(resource0, resource1); + } private final Map> remoteRequests = @@ -233,6 +233,7 @@ public RegisterApplicationMasterResponse registerApplicationMaster( return registerApplicationMaster(); } + @SuppressWarnings("unchecked") private RegisterApplicationMasterResponse registerApplicationMaster() throws YarnException, IOException { RegisterApplicationMasterRequest request = @@ -245,6 +246,7 @@ private RegisterApplicationMasterResponse registerApplicationMaster() if (!response.getNMTokensFromPreviousAttempts().isEmpty()) { populateNMTokens(response.getNMTokensFromPreviousAttempts()); } + this.resourceProfilesMap = response.getResourceProfiles(); } return response; } @@ -416,13 +418,15 @@ private List cloneAsks() { for(ResourceRequest r : ask) { // create a copy of ResourceRequest as we might change it while the // RPC layer is using it to send info across - ResourceRequest rr = ResourceRequest.newBuilder() - .priority(r.getPriority()).resourceName(r.getResourceName()) - .capability(r.getCapability()).numContainers(r.getNumContainers()) - .relaxLocality(r.getRelaxLocality()) - .nodeLabelExpression(r.getNodeLabelExpression()) - .executionTypeRequest(r.getExecutionTypeRequest()) - .allocationRequestId(r.getAllocationRequestId()).build(); + ResourceRequest rr = + ResourceRequest.newBuilder().priority(r.getPriority()) + .resourceName(r.getResourceName()).capability(r.getCapability()) + .numContainers(r.getNumContainers()) + .relaxLocality(r.getRelaxLocality()) + .nodeLabelExpression(r.getNodeLabelExpression()) + .executionTypeRequest(r.getExecutionTypeRequest()) + .allocationRequestId(r.getAllocationRequestId()) + .profileCapability(r.getProfileCapability()).build(); askList.add(rr); } return askList; @@ -504,6 +508,8 @@ public void unregisterApplicationMaster(FinalApplicationStatus appStatus, public synchronized void addContainerRequest(T req) { Preconditions.checkArgument(req != null, "Resource request can not be null."); + ProfileCapability profileCapability = ProfileCapability + .newInstance(req.getResourceProfile(), req.getCapability()); Set dedupedRacks = new HashSet(); if (req.getRacks() != null) { dedupedRacks.addAll(req.getRacks()); @@ -516,6 +522,8 @@ public synchronized void addContainerRequest(T req) { Set inferredRacks = resolveRacks(req.getNodes()); inferredRacks.removeAll(dedupedRacks); + checkResourceProfile(req.getResourceProfile()); + // check that specific and non-specific requests cannot be mixed within a // priority checkLocalityRelaxationConflict(req.getAllocationRequestId(), @@ -540,26 +548,26 @@ public synchronized void addContainerRequest(T req) { } for (String node : dedupedNodes) { addResourceRequest(req.getPriority(), node, - req.getExecutionTypeRequest(), req.getCapability(), req, true, + req.getExecutionTypeRequest(), profileCapability, req, true, req.getNodeLabelExpression()); } } for (String rack : dedupedRacks) { addResourceRequest(req.getPriority(), rack, req.getExecutionTypeRequest(), - req.getCapability(), req, true, req.getNodeLabelExpression()); + profileCapability, req, true, req.getNodeLabelExpression()); } // Ensure node requests are accompanied by requests for // corresponding rack for (String rack : inferredRacks) { addResourceRequest(req.getPriority(), rack, req.getExecutionTypeRequest(), - req.getCapability(), req, req.getRelaxLocality(), + profileCapability, req, req.getRelaxLocality(), req.getNodeLabelExpression()); } // Off-switch addResourceRequest(req.getPriority(), ResourceRequest.ANY, - req.getExecutionTypeRequest(), req.getCapability(), req, + req.getExecutionTypeRequest(), profileCapability, req, req.getRelaxLocality(), req.getNodeLabelExpression()); } @@ -567,6 +575,8 @@ public synchronized void addContainerRequest(T req) { public synchronized void removeContainerRequest(T req) { Preconditions.checkArgument(req != null, "Resource request can not be null."); + ProfileCapability profileCapability = ProfileCapability + .newInstance(req.getResourceProfile(), req.getCapability()); Set allRacks = new HashSet(); if (req.getRacks() != null) { allRacks.addAll(req.getRacks()); @@ -577,17 +587,17 @@ public synchronized void removeContainerRequest(T req) { if (req.getNodes() != null) { for (String node : new HashSet(req.getNodes())) { decResourceRequest(req.getPriority(), node, - req.getExecutionTypeRequest(), req.getCapability(), req); + req.getExecutionTypeRequest(), profileCapability, req); } } for (String rack : allRacks) { decResourceRequest(req.getPriority(), rack, - req.getExecutionTypeRequest(), req.getCapability(), req); + req.getExecutionTypeRequest(), profileCapability, req); } decResourceRequest(req.getPriority(), ResourceRequest.ANY, - req.getExecutionTypeRequest(), req.getCapability(), req); + req.getExecutionTypeRequest(), profileCapability, req); } @Override @@ -686,6 +696,17 @@ public synchronized List> getMatchingRequests( public synchronized List> getMatchingRequests( Priority priority, String resourceName, ExecutionType executionType, Resource capability) { + ProfileCapability profileCapability = + ProfileCapability.newInstance(capability); + return getMatchingRequests(priority, resourceName, executionType, + profileCapability); + } + + @Override + @SuppressWarnings("unchecked") + public synchronized List> getMatchingRequests( + Priority priority, String resourceName, ExecutionType executionType, + ProfileCapability capability) { Preconditions.checkArgument(capability != null, "The Resource to be requested should not be null "); Preconditions.checkArgument(priority != null, @@ -695,22 +716,22 @@ public synchronized List> getMatchingRequests( RemoteRequestsTable remoteRequestsTable = getTable(0); if (null != remoteRequestsTable) { - List> matchingRequests = - remoteRequestsTable.getMatchingRequests(priority, resourceName, - executionType, capability); + List> matchingRequests = remoteRequestsTable + .getMatchingRequests(priority, resourceName, executionType, + capability); if (null != matchingRequests) { // If no exact match. Container may be larger than what was requested. // get all resources <= capability. map is reverse sorted. for (ResourceRequestInfo resReqInfo : matchingRequests) { - if (canFit(resReqInfo.remoteRequest.getCapability(), capability) && - !resReqInfo.containerRequests.isEmpty()) { + if (canFit(resReqInfo.remoteRequest.getProfileCapability(), + capability) && !resReqInfo.containerRequests.isEmpty()) { list.add(resReqInfo.containerRequests); } } } } // no match found - return list; + return list; } private Set resolveRacks(List nodes) { @@ -758,6 +779,15 @@ private void checkLocalityRelaxationConflict(Long allocationReqId, } } } + + private void checkResourceProfile(String profile) { + if (resourceProfilesMap != null && !resourceProfilesMap.isEmpty() + && !resourceProfilesMap.containsKey(profile)) { + throw new InvalidContainerRequestException( + "Invalid profile name, valid profile names are " + resourceProfilesMap + .keySet()); + } + } /** * Valid if a node label expression specified on container request is valid or @@ -845,12 +875,16 @@ private void addResourceRequestToAsk(ResourceRequest remoteRequest) { } private void addResourceRequest(Priority priority, String resourceName, - ExecutionTypeRequest execTypeReq, Resource capability, T req, + ExecutionTypeRequest execTypeReq, ProfileCapability capability, T req, boolean relaxLocality, String labelExpression) { RemoteRequestsTable remoteRequestsTable = getTable(req.getAllocationRequestId()); if (remoteRequestsTable == null) { remoteRequestsTable = new RemoteRequestsTable(); + if (this.resourceProfilesMap instanceof HashMap) { + remoteRequestsTable.setResourceComparator( + new ProfileCapabilityComparator((HashMap) resourceProfilesMap)); + } putTable(req.getAllocationRequestId(), remoteRequestsTable); } @SuppressWarnings("unchecked") @@ -863,6 +897,7 @@ private void addResourceRequest(Priority priority, String resourceName, addResourceRequestToAsk(resourceRequestInfo.remoteRequest); if (LOG.isDebugEnabled()) { + LOG.debug("Adding request to ask " + resourceRequestInfo.remoteRequest); LOG.debug("addResourceRequest:" + " applicationId=" + " priority=" + priority.getPriority() + " resourceName=" + resourceName + " numContainers=" @@ -872,7 +907,7 @@ private void addResourceRequest(Priority priority, String resourceName, } private void decResourceRequest(Priority priority, String resourceName, - ExecutionTypeRequest execTypeReq, Resource capability, T req) { + ExecutionTypeRequest execTypeReq, ProfileCapability capability, T req) { RemoteRequestsTable remoteRequestsTable = getTable(req.getAllocationRequestId()); if (remoteRequestsTable != null) { @@ -882,7 +917,7 @@ private void decResourceRequest(Priority priority, String resourceName, execTypeReq, capability, req); // send the ResourceRequest to RM even if is 0 because it needs to // override a previously sent value. If ResourceRequest was not sent - // previously then sending 0 ought to be a no-op on RM + // previously then sending 0 aught to be a no-op on RM if (resourceRequestInfo != null) { addResourceRequestToAsk(resourceRequestInfo.remoteRequest); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/RemoteRequestsTable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/RemoteRequestsTable.java index 110ca799436..135e1db2939 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/RemoteRequestsTable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/RemoteRequestsTable.java @@ -23,7 +23,7 @@ import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest; import org.apache.hadoop.yarn.api.records.Priority; -import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ProfileCapability; import java.util.Collection; import java.util.HashMap; @@ -35,43 +35,42 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl.ResourceRequestInfo; -import org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl.ResourceReverseMemoryThenCpuComparator; +import org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl.ProfileCapabilityComparator; class RemoteRequestsTable implements Iterable{ private static final Log LOG = LogFactory.getLog(RemoteRequestsTable.class); - static ResourceReverseMemoryThenCpuComparator resourceComparator = - new ResourceReverseMemoryThenCpuComparator(); + private ProfileCapabilityComparator resourceComparator; /** * Nested Iterator that iterates over just the ResourceRequestInfo * object. */ class RequestInfoIterator implements Iterator { - private Iterator>>> iLocMap; - private Iterator>> iExecTypeMap; - private Iterator> iCapMap; + private Iterator> iCapMap; private Iterator iResReqInfo; public RequestInfoIterator(Iterator>>> + Map>>> iLocationMap) { this.iLocMap = iLocationMap; if (iLocMap.hasNext()) { iExecTypeMap = iLocMap.next().values().iterator(); } else { iExecTypeMap = - new LinkedList>>().iterator(); } if (iExecTypeMap.hasNext()) { iCapMap = iExecTypeMap.next().values().iterator(); } else { iCapMap = - new LinkedList>() + new LinkedList>() .iterator(); } if (iCapMap.hasNext()) { @@ -113,7 +112,7 @@ public void remove() { // Nest map with Primary key : // Priority -> ResourceName(String) -> ExecutionType -> Capability(Resource) // and value : ResourceRequestInfo - private Map>>> remoteRequestsTable = new HashMap<>(); @Override @@ -122,8 +121,8 @@ public Iterator iterator() { } ResourceRequestInfo get(Priority priority, String location, - ExecutionType execType, Resource capability) { - TreeMap capabilityMap = + ExecutionType execType, ProfileCapability capability) { + TreeMap capabilityMap = getCapabilityMap(priority, location, execType); if (capabilityMap == null) { return null; @@ -131,9 +130,10 @@ ResourceRequestInfo get(Priority priority, String location, return capabilityMap.get(capability); } + @SuppressWarnings("unchecked") void put(Priority priority, String resourceName, ExecutionType execType, - Resource capability, ResourceRequestInfo resReqInfo) { - Map>> locationMap = remoteRequestsTable.get(priority); if (locationMap == null) { @@ -143,8 +143,8 @@ void put(Priority priority, String resourceName, ExecutionType execType, LOG.debug("Added priority=" + priority); } } - Map> execTypeMap = - locationMap.get(resourceName); + Map> + execTypeMap = locationMap.get(resourceName); if (execTypeMap == null) { execTypeMap = new HashMap<>(); locationMap.put(resourceName, execTypeMap); @@ -152,9 +152,14 @@ void put(Priority priority, String resourceName, ExecutionType execType, LOG.debug("Added resourceName=" + resourceName); } } - TreeMap capabilityMap = + TreeMap capabilityMap = execTypeMap.get(execType); if (capabilityMap == null) { + // this can happen if the user doesn't register with the RM before + // calling addResourceRequest + if (resourceComparator == null) { + resourceComparator = new ProfileCapabilityComparator(new HashMap<>()); + } capabilityMap = new TreeMap<>(resourceComparator); execTypeMap.put(execType, capabilityMap); if (LOG.isDebugEnabled()) { @@ -165,9 +170,9 @@ void put(Priority priority, String resourceName, ExecutionType execType, } ResourceRequestInfo remove(Priority priority, String resourceName, - ExecutionType execType, Resource capability) { + ExecutionType execType, ProfileCapability capability) { ResourceRequestInfo retVal = null; - Map>> locationMap = remoteRequestsTable.get(priority); if (locationMap == null) { if (LOG.isDebugEnabled()) { @@ -175,7 +180,7 @@ ResourceRequestInfo remove(Priority priority, String resourceName, } return null; } - Map> + Map> execTypeMap = locationMap.get(resourceName); if (execTypeMap == null) { if (LOG.isDebugEnabled()) { @@ -183,7 +188,7 @@ ResourceRequestInfo remove(Priority priority, String resourceName, } return null; } - TreeMap capabilityMap = + TreeMap capabilityMap = execTypeMap.get(execType); if (capabilityMap == null) { if (LOG.isDebugEnabled()) { @@ -204,14 +209,14 @@ ResourceRequestInfo remove(Priority priority, String resourceName, return retVal; } - Map>> getLocationMap(Priority priority) { return remoteRequestsTable.get(priority); } - Map> + Map> getExecutionTypeMap(Priority priority, String location) { - Map>> locationMap = getLocationMap(priority); if (locationMap == null) { return null; @@ -219,10 +224,10 @@ ResourceRequestInfo>>> getLocationMap(Priority priority) { return locationMap.get(location); } - TreeMap getCapabilityMap(Priority + TreeMap getCapabilityMap(Priority priority, String location, ExecutionType execType) { - Map> + Map> executionTypeMap = getExecutionTypeMap(priority, location); if (executionTypeMap == null) { return null; @@ -236,7 +241,7 @@ List getAllResourceRequestInfos(Priority priority, List retList = new LinkedList<>(); for (String location : locations) { for (ExecutionType eType : ExecutionType.values()) { - TreeMap capabilityMap = + TreeMap capabilityMap = getCapabilityMap(priority, location, eType); if (capabilityMap != null) { retList.addAll(capabilityMap.values()); @@ -248,9 +253,9 @@ List getAllResourceRequestInfos(Priority priority, List getMatchingRequests( Priority priority, String resourceName, ExecutionType executionType, - Resource capability) { + ProfileCapability capability) { List list = new LinkedList<>(); - TreeMap capabilityMap = + TreeMap capabilityMap = getCapabilityMap(priority, resourceName, executionType); if (capabilityMap != null) { ResourceRequestInfo resourceRequestInfo = capabilityMap.get(capability); @@ -266,14 +271,15 @@ List getMatchingRequests( @SuppressWarnings("unchecked") ResourceRequestInfo addResourceRequest(Long allocationRequestId, Priority priority, String resourceName, ExecutionTypeRequest execTypeReq, - Resource capability, T req, boolean relaxLocality, + ProfileCapability capability, T req, boolean relaxLocality, String labelExpression) { - ResourceRequestInfo resourceRequestInfo = get(priority, resourceName, - execTypeReq.getExecutionType(), capability); + ResourceRequestInfo resourceRequestInfo = + get(priority, resourceName, execTypeReq.getExecutionType(), capability); if (resourceRequestInfo == null) { resourceRequestInfo = new ResourceRequestInfo(allocationRequestId, priority, resourceName, - capability, relaxLocality); + capability.getProfileCapabilityOverride(), relaxLocality, + capability.getProfileName()); put(priority, resourceName, execTypeReq.getExecutionType(), capability, resourceRequestInfo); } @@ -288,11 +294,14 @@ ResourceRequestInfo addResourceRequest(Long allocationRequestId, if (ResourceRequest.ANY.equals(resourceName)) { resourceRequestInfo.remoteRequest.setNodeLabelExpression(labelExpression); } + if (LOG.isDebugEnabled()) { + LOG.debug("Adding request to ask " + resourceRequestInfo.remoteRequest); + } return resourceRequestInfo; } ResourceRequestInfo decResourceRequest(Priority priority, String resourceName, - ExecutionTypeRequest execTypeReq, Resource capability, T req) { + ExecutionTypeRequest execTypeReq, ProfileCapability capability, T req) { ResourceRequestInfo resourceRequestInfo = get(priority, resourceName, execTypeReq.getExecutionType(), capability); @@ -330,4 +339,34 @@ boolean isEmpty() { return remoteRequestsTable.isEmpty(); } + @SuppressWarnings("unchecked") + public void setResourceComparator(ProfileCapabilityComparator comparator) { + ProfileCapabilityComparator old = this.resourceComparator; + this.resourceComparator = comparator; + if (old != null) { + // we've already set a resource comparator - re-create the maps with the + // new one. this is needed in case someone adds container requests before + // registering with the RM. In such a case, the comparator won't have + // the resource profiles map. After registration, the map is available + // so re-create the capabilities maps + + for (Map.Entry>>> + priEntry : remoteRequestsTable.entrySet()) { + for (Map.Entry>> nameEntry : priEntry.getValue().entrySet()) { + for (Map.Entry> execEntry : nameEntry + .getValue().entrySet()) { + Map capabilityMap = + execEntry.getValue(); + TreeMap newCapabiltyMap = + new TreeMap<>(resourceComparator); + newCapabiltyMap.putAll(capabilityMap); + execEntry.setValue(newCapabiltyMap); + } + } + } + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java index 19cb10b1d11..3c30023f5b2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java @@ -43,6 +43,8 @@ import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -70,6 +72,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToLabelsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileRequest; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest; @@ -101,6 +104,8 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; import org.apache.hadoop.yarn.api.records.SignalContainerCommand; import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.api.records.YarnApplicationState; @@ -937,4 +942,28 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( throws YarnException, IOException { return rmClient.updateApplicationTimeouts(request); } + + @Override + public Map getResourceProfiles() + throws YarnException, IOException { + GetAllResourceProfilesRequest request = + GetAllResourceProfilesRequest.newInstance(); + return rmClient.getResourceProfiles(request).getResourceProfiles(); + } + + @Override + public Resource getResourceProfile(String profile) + throws YarnException, IOException { + GetResourceProfileRequest request = GetResourceProfileRequest + .newInstance(profile); + return rmClient.getResourceProfile(request).getResource(); + } + + @Override + public List getResourceTypeInfo() + throws YarnException, IOException { + GetAllResourceTypeInfoRequest request = + GetAllResourceTypeInfoRequest.newInstance(); + return rmClient.getResourceTypeInfo(request).getResourceTypeInfo(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java index 893348ac246..2a9b3bcd925 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java @@ -41,6 +41,7 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsRequest; +import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -61,6 +62,8 @@ import com.google.common.annotations.VisibleForTesting; +import static org.apache.hadoop.yarn.util.StringHelper.getResourceSecondsString; + @Private @Unstable public class ApplicationCLI extends YarnCLI { @@ -359,10 +362,21 @@ private void updateApplicationTimeout(String applicationId, + timeoutType.toString() + " of an application " + applicationId); UpdateApplicationTimeoutsRequest request = UpdateApplicationTimeoutsRequest .newInstance(appId, Collections.singletonMap(timeoutType, newTimeout)); - client.updateApplicationTimeouts(request); + UpdateApplicationTimeoutsResponse updateApplicationTimeouts = + client.updateApplicationTimeouts(request); + String updatedTimeout = + updateApplicationTimeouts.getApplicationTimeouts().get(timeoutType); + + if (timeoutType.equals(ApplicationTimeoutType.LIFETIME) + && !newTimeout.equals(updatedTimeout)) { + sysout.println("Updated lifetime of an application " + applicationId + + " to queue max/default lifetime." + " New expiry time is " + + updatedTimeout); + return; + } sysout.println( "Successfully updated " + timeoutType.toString() + " of an application " - + applicationId + ". New expiry time is " + newTimeout); + + applicationId + ". New expiry time is " + updatedTimeout); } /** @@ -699,24 +713,9 @@ private int printApplicationReport(String applicationId) appReportStr.println(appReport.getRpcPort()); appReportStr.print("\tAM Host : "); appReportStr.println(appReport.getHost()); - appReportStr.print("\tAggregate Resource Allocation : "); - ApplicationResourceUsageReport usageReport = appReport.getApplicationResourceUsageReport(); - if (usageReport != null) { - //completed app report in the timeline server doesn't have usage report - appReportStr.print(usageReport.getMemorySeconds() + " MB-seconds, "); - appReportStr.println(usageReport.getVcoreSeconds() + " vcore-seconds"); - appReportStr.print("\tAggregate Resource Preempted : "); - appReportStr.print(usageReport.getPreemptedMemorySeconds() + - " MB-seconds, "); - appReportStr.println(usageReport.getPreemptedVcoreSeconds() + - " vcore-seconds"); - } else { - appReportStr.println("N/A"); - appReportStr.print("\tAggregate Resource Preempted : "); - appReportStr.println("N/A"); - } + printResourceUsage(appReportStr, usageReport); appReportStr.print("\tLog Aggregation Status : "); appReportStr.println(appReport.getLogAggregationStatus() == null ? "N/A" : appReport.getLogAggregationStatus()); @@ -747,6 +746,22 @@ private int printApplicationReport(String applicationId) return 0; } + private void printResourceUsage(PrintWriter appReportStr, + ApplicationResourceUsageReport usageReport) { + appReportStr.print("\tAggregate Resource Allocation : "); + if (usageReport != null) { + appReportStr.println( + getResourceSecondsString(usageReport.getResourceSecondsMap())); + appReportStr.print("\tAggregate Resource Preempted : "); + appReportStr.println(getResourceSecondsString( + usageReport.getPreemptedResourceSecondsMap())); + } else { + appReportStr.println("N/A"); + appReportStr.print("\tAggregate Resource Preempted : "); + appReportStr.println("N/A"); + } + } + private String getAllValidApplicationStates() { StringBuilder sb = new StringBuilder(); sb.append("The valid application state can be" + " one of the following: "); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/LogsCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/LogsCLI.java index 5528412aff7..9a8ba4a6740 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/LogsCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/LogsCLI.java @@ -22,6 +22,9 @@ import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; +import java.net.ConnectException; +import java.net.SocketException; +import java.net.SocketTimeoutException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -62,9 +65,10 @@ import org.apache.hadoop.yarn.client.api.YarnClient; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.logaggregation.ContainerLogFileInfo; import org.apache.hadoop.yarn.logaggregation.ContainerLogsRequest; import org.apache.hadoop.yarn.logaggregation.LogCLIHelpers; -import org.apache.hadoop.yarn.logaggregation.PerContainerLogFileInfo; +import org.apache.hadoop.yarn.logaggregation.LogToolUtils; import org.apache.hadoop.yarn.webapp.util.WebAppUtils; import org.apache.hadoop.yarn.webapp.util.YarnWebServiceUtils; import org.codehaus.jettison.json.JSONArray; @@ -74,9 +78,11 @@ import com.google.common.annotations.VisibleForTesting; import com.sun.jersey.api.client.Client; import com.sun.jersey.api.client.ClientHandlerException; +import com.sun.jersey.api.client.ClientRequest; import com.sun.jersey.api.client.ClientResponse; import com.sun.jersey.api.client.UniformInterfaceException; import com.sun.jersey.api.client.WebResource; +import com.sun.jersey.api.client.filter.ClientFilter; @Public @Evolving @@ -97,14 +103,27 @@ public class LogsCLI extends Configured implements Tool { = "show_container_log_info"; private static final String OUT_OPTION = "out"; private static final String SIZE_OPTION = "size"; + private static final String CLIENT_MAX_RETRY_OPTION = "client_max_retries"; + private static final String CLIENT_RETRY_INTERVAL_OPTION + = "client_retry_interval_ms"; public static final String HELP_CMD = "help"; + private PrintStream outStream = System.out; private YarnClient yarnClient = null; + private Client webServiceClient = null; + + private static final int DEFAULT_MAX_RETRIES = 30; + private static final long DEFAULT_RETRY_INTERVAL = 1000; + + @Private + @VisibleForTesting + ClientConnectionRetry connectionRetry; @Override public int run(String[] args) throws Exception { try { yarnClient = createYarnClient(); + webServiceClient = Client.create(); return runCommand(args); } finally { if (yarnClient != null) { @@ -139,6 +158,8 @@ private int runCommand(String[] args) throws Exception { List amContainersList = new ArrayList(); String localDir = null; long bytes = Long.MAX_VALUE; + int maxRetries = DEFAULT_MAX_RETRIES; + long retryInterval = DEFAULT_RETRY_INTERVAL; try { CommandLine commandLine = parser.parse(opts, args, false); appIdStr = commandLine.getOptionValue(APPLICATION_ID_OPTION); @@ -170,6 +191,14 @@ private int runCommand(String[] args) throws Exception { if (commandLine.hasOption(SIZE_OPTION)) { bytes = Long.parseLong(commandLine.getOptionValue(SIZE_OPTION)); } + if (commandLine.hasOption(CLIENT_MAX_RETRY_OPTION)) { + maxRetries = Integer.parseInt(commandLine.getOptionValue( + CLIENT_MAX_RETRY_OPTION)); + } + if (commandLine.hasOption(CLIENT_RETRY_INTERVAL_OPTION)) { + retryInterval = Long.parseLong(commandLine.getOptionValue( + CLIENT_RETRY_INTERVAL_OPTION)); + } } catch (ParseException e) { System.err.println("options parsing failed: " + e.getMessage()); printHelpMessage(printOpts); @@ -231,6 +260,11 @@ private int runCommand(String[] args) throws Exception { } } + // Set up Retry WebService Client + connectionRetry = new ClientConnectionRetry(maxRetries, retryInterval); + ClientJerseyRetryFilter retryFilter = new ClientJerseyRetryFilter(); + webServiceClient.addFilter(retryFilter); + LogCLIHelpers logCliHelper = new LogCLIHelpers(); logCliHelper.setConf(getConf()); @@ -341,7 +375,6 @@ private void printHelpMessage(Options options) { protected List getAMContainerInfoForRMWebService( Configuration conf, String appId) throws ClientHandlerException, UniformInterfaceException, JSONException { - Client webServiceClient = Client.create(); String webAppAddress = WebAppUtils.getRMWebAppURLWithScheme(conf); WebResource webResource = webServiceClient.resource(webAppAddress); @@ -363,7 +396,6 @@ protected List getAMContainerInfoForRMWebService( private List getAMContainerInfoForAHSWebService( Configuration conf, String appId) throws ClientHandlerException, UniformInterfaceException, JSONException { - Client webServiceClient = Client.create(); String webAppAddress = WebAppUtils.getHttpSchemePrefix(conf) + WebAppUtils.getAHSWebAppURLWithoutScheme(conf); @@ -411,12 +443,11 @@ private boolean fetchAllLogFiles(String[] logFiles, String[] logFilesRegex) { return false; } - private List> getContainerLogFiles( + private List> getContainerLogFiles( Configuration conf, String containerIdStr, String nodeHttpAddress) throws IOException { - List> logFileInfos + List> logFileInfos = new ArrayList<>(); - Client webServiceClient = Client.create(); try { WebResource webResource = webServiceClient .resource(WebAppUtils.getHttpSchemePrefix(conf) + nodeHttpAddress); @@ -453,12 +484,12 @@ private List> getContainerLogFiles( if (ob instanceof JSONArray) { JSONArray obArray = (JSONArray)ob; for (int j = 0; j < obArray.length(); j++) { - logFileInfos.add(new Pair( + logFileInfos.add(new Pair( generatePerContainerLogFileInfoFromJSON( obArray.getJSONObject(j)), aggregateType)); } } else if (ob instanceof JSONObject) { - logFileInfos.add(new Pair( + logFileInfos.add(new Pair( generatePerContainerLogFileInfoFromJSON( (JSONObject)ob), aggregateType)); } @@ -477,7 +508,7 @@ private List> getContainerLogFiles( return logFileInfos; } - private PerContainerLogFileInfo generatePerContainerLogFileInfoFromJSON( + private ContainerLogFileInfo generatePerContainerLogFileInfoFromJSON( JSONObject meta) throws JSONException { String fileName = meta.has("fileName") ? meta.getString("fileName") : "N/A"; @@ -485,11 +516,10 @@ private PerContainerLogFileInfo generatePerContainerLogFileInfoFromJSON( meta.getString("fileSize") : "N/A"; String lastModificationTime = meta.has("lastModifiedTime") ? meta.getString("lastModifiedTime") : "N/A"; - return new PerContainerLogFileInfo(fileName, fileSize, + return new ContainerLogFileInfo(fileName, fileSize, lastModificationTime); } - @Private @VisibleForTesting public int printContainerLogsFromRunningApplication(Configuration conf, @@ -506,7 +536,7 @@ public int printContainerLogsFromRunningApplication(Configuration conf, return -1; } String nodeId = request.getNodeId(); - PrintStream out = logCliHelper.createPrintStream(localDir, nodeId, + PrintStream out = LogToolUtils.createPrintStream(localDir, nodeId, containerIdStr); try { Set matchedFiles = getMatchedContainerLogFiles(request, @@ -520,7 +550,6 @@ public int printContainerLogsFromRunningApplication(Configuration conf, ContainerLogsRequest newOptions = new ContainerLogsRequest(request); newOptions.setLogTypes(matchedFiles); - Client webServiceClient = Client.create(); boolean foundAnyLogs = false; byte[] buffer = new byte[65536]; for (String logFile : newOptions.getLogTypes()) { @@ -796,6 +825,10 @@ private int showApplicationLogInfo(ContainerLogsRequest request, } } + /** + * Create Command Options. + * @return the command options + */ private Options createCommandOpts() { Options opts = new Options(); opts.addOption(HELP_CMD, false, "Displays help for all commands."); @@ -858,6 +891,13 @@ private Options createCommandOpts() { opts.addOption(SIZE_OPTION, true, "Prints the log file's first 'n' bytes " + "or the last 'n' bytes. Use negative values as bytes to read from " + "the end and positive values as bytes to read from the beginning."); + opts.addOption(CLIENT_MAX_RETRY_OPTION, true, "Set max retry number for a" + + " retry client to get the container logs for the running " + + "applications. Use a negative value to make retry forever. " + + "The default value is 30."); + opts.addOption(CLIENT_RETRY_INTERVAL_OPTION, true, + "Work with --client_max_retries to create a retry client. " + + "The default value is 1000."); opts.getOption(APPLICATION_ID_OPTION).setArgName("Application ID"); opts.getOption(CONTAINER_ID_OPTION).setArgName("Container ID"); opts.getOption(NODE_ADDRESS_OPTION).setArgName("Node Address"); @@ -865,9 +905,17 @@ private Options createCommandOpts() { opts.getOption(AM_CONTAINER_OPTION).setArgName("AM Containers"); opts.getOption(OUT_OPTION).setArgName("Local Directory"); opts.getOption(SIZE_OPTION).setArgName("size"); + opts.getOption(CLIENT_MAX_RETRY_OPTION).setArgName("Max Retries"); + opts.getOption(CLIENT_RETRY_INTERVAL_OPTION) + .setArgName("Retry Interval"); return opts; } + /** + * Create Print options for helper message. + * @param commandOpts the options + * @return the print options + */ private Options createPrintOpts(Options commandOpts) { Options printOpts = new Options(); printOpts.addOption(commandOpts.getOption(HELP_CMD)); @@ -883,6 +931,8 @@ private Options createPrintOpts(Options commandOpts) { printOpts.addOption(commandOpts.getOption(SIZE_OPTION)); printOpts.addOption(commandOpts.getOption( PER_CONTAINER_LOG_FILES_REGEX_OPTION)); + printOpts.addOption(commandOpts.getOption(CLIENT_MAX_RETRY_OPTION)); + printOpts.addOption(commandOpts.getOption(CLIENT_RETRY_INTERVAL_OPTION)); return printOpts; } @@ -1235,9 +1285,9 @@ private void outputContainerLogMeta(String containerId, String nodeId, outStream.printf(LogCLIHelpers.PER_LOG_FILE_INFO_PATTERN, "LogFile", "LogLength", "LastModificationTime", "LogAggregationType"); outStream.println(StringUtils.repeat("=", containerString.length() * 2)); - List> infos = getContainerLogFiles( + List> infos = getContainerLogFiles( getConf(), containerId, nodeHttpAddress); - for (Pair info : infos) { + for (Pair info : infos) { outStream.printf(LogCLIHelpers.PER_LOG_FILE_INFO_PATTERN, info.getKey().getFileName(), info.getKey().getFileSize(), info.getKey().getLastModifiedTime(), info.getValue()); @@ -1249,11 +1299,11 @@ public Set getMatchedContainerLogFiles(ContainerLogsRequest request, boolean useRegex) throws IOException { // fetch all the log files for the container // filter the log files based on the given -log_files pattern - List> allLogFileInfos= + List> allLogFileInfos= getContainerLogFiles(getConf(), request.getContainerId(), request.getNodeHttpAddress()); List fileNames = new ArrayList(); - for (Pair fileInfo : allLogFileInfos) { + for (Pair fileInfo : allLogFileInfos) { fileNames.add(fileInfo.getKey().getFileName()); } return getMatchedLogFiles(request, fileNames, @@ -1285,4 +1335,120 @@ public String getNodeHttpAddressFromRMWebString(ContainerLogsRequest request) return nodeInfo.has("nodeHTTPAddress") ? nodeInfo.getString("nodeHTTPAddress") : null; } + + // Class to handle retry + static class ClientConnectionRetry { + + // maxRetries < 0 means keep trying + @Private + @VisibleForTesting + public int maxRetries; + + @Private + @VisibleForTesting + public long retryInterval; + + // Indicates if retries happened last time. Only tests should read it. + // In unit tests, retryOn() calls should _not_ be concurrent. + private boolean retried = false; + + @Private + @VisibleForTesting + boolean getRetired() { + return retried; + } + + // Constructor with default retry settings + public ClientConnectionRetry(int inputMaxRetries, + long inputRetryInterval) { + this.maxRetries = inputMaxRetries; + this.retryInterval = inputRetryInterval; + } + + public Object retryOn(ClientRetryOp op) + throws RuntimeException, IOException { + int leftRetries = maxRetries; + retried = false; + + // keep trying + while (true) { + try { + // try perform the op, if fail, keep retrying + return op.run(); + } catch (IOException | RuntimeException e) { + // break if there's no retries left + if (leftRetries == 0) { + break; + } + if (op.shouldRetryOn(e)) { + logException(e, leftRetries); + } else { + throw e; + } + } + if (leftRetries > 0) { + leftRetries--; + } + retried = true; + try { + // sleep for the given time interval + Thread.sleep(retryInterval); + } catch (InterruptedException ie) { + System.out.println("Client retry sleep interrupted! "); + } + } + throw new RuntimeException("Connection retries limit exceeded."); + }; + + private void logException(Exception e, int leftRetries) { + if (leftRetries > 0) { + System.out.println("Exception caught by ClientConnectionRetry," + + " will try " + leftRetries + " more time(s).\nMessage: " + + e.getMessage()); + } else { + // note that maxRetries may be -1 at the very beginning + System.out.println("ConnectionException caught by ClientConnectionRetry," + + " will keep retrying.\nMessage: " + + e.getMessage()); + } + } + } + + private class ClientJerseyRetryFilter extends ClientFilter { + @Override + public ClientResponse handle(final ClientRequest cr) + throws ClientHandlerException { + // Set up the retry operation + ClientRetryOp jerseyRetryOp = new ClientRetryOp() { + @Override + public Object run() { + // Try pass the request, if fail, keep retrying + return getNext().handle(cr); + } + + @Override + public boolean shouldRetryOn(Exception e) { + // Only retry on connection exceptions + return (e instanceof ClientHandlerException) + && (e.getCause() instanceof ConnectException || + e.getCause() instanceof SocketTimeoutException || + e.getCause() instanceof SocketException); + } + }; + try { + return (ClientResponse) connectionRetry.retryOn(jerseyRetryOp); + } catch (IOException e) { + throw new ClientHandlerException("Jersey retry failed!\nMessage: " + + e.getMessage()); + } + } + } + + // Abstract class for an operation that should be retried by client + private static abstract class ClientRetryOp { + // The operation that should be retried + public abstract Object run() throws IOException; + // The method to indicate if we should retry given the incoming exception + public abstract boolean shouldRetryOn(Exception e); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/async/impl/TestAMRMClientAsync.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/async/impl/TestAMRMClientAsync.java index 56826c431c7..8d912a6040f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/async/impl/TestAMRMClientAsync.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/async/impl/TestAMRMClientAsync.java @@ -34,6 +34,7 @@ import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -62,7 +63,6 @@ import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; -import com.google.common.base.Supplier; public class TestAMRMClientAsync { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java index 51806c9e1fc..aa75ce8a5b8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java @@ -33,6 +33,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.function.Supplier; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -88,7 +89,6 @@ import org.mockito.stubbing.Answer; import org.eclipse.jetty.util.log.Log; -import com.google.common.base.Supplier; /** * Test application master client class to resource manager. @@ -130,11 +130,13 @@ public static Collection data() { @Before public void setup() throws Exception { conf = new YarnConfiguration(); - createClusterAndStartApplication(); + createClusterAndStartApplication(conf); } - private void createClusterAndStartApplication() throws Exception { + private void createClusterAndStartApplication(Configuration conf) + throws Exception { // start minicluster + this.conf = conf; conf.set(YarnConfiguration.RM_SCHEDULER, schedulerName); conf.setLong( YarnConfiguration.RM_AMRM_TOKEN_MASTER_KEY_ROLLING_INTERVAL_SECS, @@ -532,11 +534,12 @@ private void verifyMatches( List> matches, int matchSize) { assertEquals(1, matches.size()); - assertEquals(matches.get(0).size(), matchSize); + assertEquals(matchSize, matches.get(0).size()); } @Test (timeout=60000) - public void testAMRMClientMatchingFitInferredRack() throws YarnException, IOException { + public void testAMRMClientMatchingFitInferredRack() + throws YarnException, IOException { AMRMClientImpl amClient = null; try { // start am rm client @@ -570,8 +573,9 @@ public void testAMRMClientMatchingFitInferredRack() throws YarnException, IOExce matches = amClient.getMatchingRequests(priority, rack, capability); assertTrue(matches.isEmpty()); - amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, - null, null); + amClient + .unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, + null); } finally { if (amClient != null && amClient.getServiceState() == STATE.STARTED) { @@ -605,15 +609,18 @@ public void testAMRMClientMatchStorage() throws YarnException, IOException { amClient.addContainerRequest(storedContainer2); amClient.addContainerRequest(storedContainer3); + ProfileCapability profileCapability = + ProfileCapability.newInstance(capability); + // test addition and storage RemoteRequestsTable remoteRequestsTable = amClient.getTable(0); int containersRequestedAny = remoteRequestsTable.get(priority, - ResourceRequest.ANY, ExecutionType.GUARANTEED, capability) + ResourceRequest.ANY, ExecutionType.GUARANTEED, profileCapability) .remoteRequest.getNumContainers(); assertEquals(2, containersRequestedAny); containersRequestedAny = remoteRequestsTable.get(priority1, - ResourceRequest.ANY, ExecutionType.GUARANTEED, capability) + ResourceRequest.ANY, ExecutionType.GUARANTEED, profileCapability) .remoteRequest.getNumContainers(); assertEquals(1, containersRequestedAny); List> matches = @@ -884,7 +891,7 @@ public void testAMRMClientWithSaslEncryption() throws Exception { teardown(); conf = new YarnConfiguration(); conf.set(CommonConfigurationKeysPublic.HADOOP_RPC_PROTECTION, "privacy"); - createClusterAndStartApplication(); + createClusterAndStartApplication(conf); initAMRMClientAndTest(false); } @@ -1183,9 +1190,11 @@ public void testAMRMClientWithContainerPromotion() true, null, ExecutionTypeRequest .newInstance(ExecutionType.OPPORTUNISTIC, true))); + ProfileCapability profileCapability = + ProfileCapability.newInstance(capability); int oppContainersRequestedAny = amClient.getTable(0).get(priority2, ResourceRequest.ANY, - ExecutionType.OPPORTUNISTIC, capability).remoteRequest + ExecutionType.OPPORTUNISTIC, profileCapability).remoteRequest .getNumContainers(); assertEquals(1, oppContainersRequestedAny); @@ -1322,9 +1331,11 @@ public void testAMRMClientWithContainerDemotion() true, null, ExecutionTypeRequest .newInstance(ExecutionType.GUARANTEED, true))); + ProfileCapability profileCapability = + ProfileCapability.newInstance(capability); int oppContainersRequestedAny = amClient.getTable(0).get(priority2, ResourceRequest.ANY, - ExecutionType.GUARANTEED, capability).remoteRequest + ExecutionType.GUARANTEED, profileCapability).remoteRequest .getNumContainers(); assertEquals(1, oppContainersRequestedAny); @@ -1702,14 +1713,16 @@ private void assertNumContainers(AMRMClientImpl amClient, int expAsks, int expRelease) { RemoteRequestsTable remoteRequestsTable = amClient.getTable(allocationReqId); + ProfileCapability profileCapability = + ProfileCapability.newInstance(capability); int containersRequestedNode = remoteRequestsTable.get(priority, - node, ExecutionType.GUARANTEED, capability).remoteRequest + node, ExecutionType.GUARANTEED, profileCapability).remoteRequest .getNumContainers(); int containersRequestedRack = remoteRequestsTable.get(priority, - rack, ExecutionType.GUARANTEED, capability).remoteRequest + rack, ExecutionType.GUARANTEED, profileCapability).remoteRequest .getNumContainers(); int containersRequestedAny = remoteRequestsTable.get(priority, - ResourceRequest.ANY, ExecutionType.GUARANTEED, capability) + ResourceRequest.ANY, ExecutionType.GUARANTEED, profileCapability) .remoteRequest.getNumContainers(); assertEquals(expNode, containersRequestedNode); @@ -1907,4 +1920,106 @@ public ApplicationMasterProtocol run() { } return result; } + + @Test(timeout = 60000) + public void testGetMatchingFitWithProfiles() throws Exception { + teardown(); + conf.setBoolean(YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED, true); + createClusterAndStartApplication(conf); + AMRMClient amClient = null; + try { + // start am rm client + amClient = AMRMClient.createAMRMClient(); + amClient.init(conf); + amClient.start(); + amClient.registerApplicationMaster("Host", 10000, ""); + + ProfileCapability capability1 = ProfileCapability.newInstance("minimum"); + ProfileCapability capability2 = ProfileCapability.newInstance("default"); + ProfileCapability capability3 = ProfileCapability.newInstance("maximum"); + ProfileCapability capability4 = ProfileCapability + .newInstance("minimum", Resource.newInstance(2048, 1)); + ProfileCapability capability5 = ProfileCapability.newInstance("default"); + ProfileCapability capability6 = ProfileCapability + .newInstance("default", Resource.newInstance(2048, 1)); + // http has the same capabilities as default + ProfileCapability capability7 = ProfileCapability.newInstance("http"); + + ContainerRequest storedContainer1 = + new ContainerRequest(capability1, nodes, racks, priority); + ContainerRequest storedContainer2 = + new ContainerRequest(capability2, nodes, racks, priority); + ContainerRequest storedContainer3 = + new ContainerRequest(capability3, nodes, racks, priority); + ContainerRequest storedContainer4 = + new ContainerRequest(capability4, nodes, racks, priority); + ContainerRequest storedContainer5 = + new ContainerRequest(capability5, nodes, racks, priority2); + ContainerRequest storedContainer6 = + new ContainerRequest(capability6, nodes, racks, priority); + ContainerRequest storedContainer7 = + new ContainerRequest(capability7, nodes, racks, priority); + + + amClient.addContainerRequest(storedContainer1); + amClient.addContainerRequest(storedContainer2); + amClient.addContainerRequest(storedContainer3); + amClient.addContainerRequest(storedContainer4); + amClient.addContainerRequest(storedContainer5); + amClient.addContainerRequest(storedContainer6); + amClient.addContainerRequest(storedContainer7); + + // test matching of containers + List> matches; + ContainerRequest storedRequest; + // exact match + ProfileCapability testCapability1 = + ProfileCapability.newInstance("minimum"); + matches = amClient + .getMatchingRequests(priority, node, ExecutionType.GUARANTEED, + testCapability1); + verifyMatches(matches, 1); + storedRequest = matches.get(0).iterator().next(); + assertEquals(storedContainer1, storedRequest); + amClient.removeContainerRequest(storedContainer1); + + // exact matching with order maintained + // we should get back 3 matches - default + http because they have the + // same capability + ProfileCapability testCapability2 = + ProfileCapability.newInstance("default"); + matches = amClient + .getMatchingRequests(priority, node, ExecutionType.GUARANTEED, + testCapability2); + verifyMatches(matches, 2); + // must be returned in the order they were made + int i = 0; + for (ContainerRequest storedRequest1 : matches.get(0)) { + switch(i) { + case 0: + assertEquals(storedContainer2, storedRequest1); + break; + case 1: + assertEquals(storedContainer7, storedRequest1); + break; + } + i++; + } + amClient.removeContainerRequest(storedContainer5); + + // matching with larger container. all requests returned + Resource testCapability3 = Resource.newInstance(8192, 8); + matches = amClient + .getMatchingRequests(priority, node, testCapability3); + assertEquals(3, matches.size()); + + Resource testCapability4 = Resource.newInstance(2048, 1); + matches = amClient.getMatchingRequests(priority, node, testCapability4); + assertEquals(1, matches.size()); + } finally { + if (amClient != null && amClient.getServiceState() == STATE.STARTED) { + amClient.stop(); + } + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientContainerRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientContainerRequest.java index 96035394ec7..c87123ad38a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientContainerRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientContainerRequest.java @@ -29,6 +29,7 @@ import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ProfileCapability; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.client.api.AMRMClient; @@ -276,9 +277,10 @@ private void verifyResourceRequest( AMRMClientImpl client, ContainerRequest request, String location, boolean expectedRelaxLocality, ExecutionType executionType) { - ResourceRequest ask = client.getTable(0) - .get(request.getPriority(), location, executionType, - request.getCapability()).remoteRequest; + ProfileCapability profileCapability = ProfileCapability + .newInstance(request.getResourceProfile(), request.getCapability()); + ResourceRequest ask = client.getTable(0).get(request.getPriority(), + location, executionType, profileCapability).remoteRequest; assertEquals(location, ask.getResourceName()); assertEquals(1, ask.getNumContainers()); assertEquals(expectedRelaxLocality, ask.getRelaxLocality()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestDistributedScheduling.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestDistributedScheduling.java index e180f6dc29a..00f5e03a9cd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestDistributedScheduling.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestDistributedScheduling.java @@ -43,6 +43,7 @@ import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ProfileCapability; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.Token; @@ -387,18 +388,21 @@ public void testAMRMClient() throws Exception { RemoteRequestsTable remoteRequestsTable = amClient.getTable(0); + ProfileCapability profileCapability = + ProfileCapability.newInstance(capability); + int containersRequestedNode = remoteRequestsTable.get(priority, - node, ExecutionType.GUARANTEED, capability).remoteRequest + node, ExecutionType.GUARANTEED, profileCapability).remoteRequest .getNumContainers(); int containersRequestedRack = remoteRequestsTable.get(priority, - rack, ExecutionType.GUARANTEED, capability).remoteRequest + rack, ExecutionType.GUARANTEED, profileCapability).remoteRequest .getNumContainers(); int containersRequestedAny = remoteRequestsTable.get(priority, - ResourceRequest.ANY, ExecutionType.GUARANTEED, capability) + ResourceRequest.ANY, ExecutionType.GUARANTEED, profileCapability) .remoteRequest.getNumContainers(); int oppContainersRequestedAny = remoteRequestsTable.get(priority2, ResourceRequest.ANY, - ExecutionType.OPPORTUNISTIC, capability).remoteRequest + ExecutionType.OPPORTUNISTIC, profileCapability).remoteRequest .getNumContainers(); assertEquals(2, containersRequestedNode); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java index b23a923513c..016f1bc26ae 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java @@ -55,6 +55,7 @@ import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ProfileCapability; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.YarnApplicationState; @@ -255,9 +256,11 @@ private Set allocateContainers( racks, priority)); } + ProfileCapability profileCapability = + ProfileCapability.newInstance(capability); int containersRequestedAny = rmClient.getTable(0) .get(priority, ResourceRequest.ANY, ExecutionType.GUARANTEED, - capability).remoteRequest.getNumContainers(); + profileCapability).remoteRequest.getNumContainers(); // RM should allocate container within 2 calls to allocate() int allocatedContainerCount = 0; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocation.java index 305d18b6525..12c32fc7dac 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestOpportunisticContainerAllocation.java @@ -42,6 +42,7 @@ import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ProfileCapability; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.Token; @@ -99,6 +100,7 @@ public class TestOpportunisticContainerAllocation { private static final long AM_EXPIRE_MS = 4000; private static Resource capability; + private static ProfileCapability profileCapability; private static Priority priority; private static Priority priority2; private static Priority priority3; @@ -151,6 +153,7 @@ public static void setup() throws Exception { priority3 = Priority.newInstance(3); priority4 = Priority.newInstance(4); capability = Resource.newInstance(512, 1); + profileCapability = ProfileCapability.newInstance(capability); node = nodeReports.get(0).getNodeId().getHost(); rack = nodeReports.get(0).getRackName(); @@ -273,7 +276,7 @@ public void testPromotionFromAcquired() throws YarnException, IOException { int oppContainersRequestedAny = amClient.getTable(0).get(priority2, ResourceRequest.ANY, - ExecutionType.OPPORTUNISTIC, capability).remoteRequest + ExecutionType.OPPORTUNISTIC, profileCapability).remoteRequest .getNumContainers(); assertEquals(1, oppContainersRequestedAny); @@ -394,7 +397,7 @@ public void testDemotionFromAcquired() throws YarnException, IOException { new AMRMClient.ContainerRequest(capability, null, null, priority3)); int guarContainersRequestedAny = amClient.getTable(0).get(priority3, - ResourceRequest.ANY, ExecutionType.GUARANTEED, capability) + ResourceRequest.ANY, ExecutionType.GUARANTEED, profileCapability) .remoteRequest.getNumContainers(); assertEquals(1, guarContainersRequestedAny); @@ -512,6 +515,7 @@ public void testMixedAllocationAndRelease() throws YarnException, assertEquals(0, amClient.ask.size()); assertEquals(0, amClient.release.size()); + amClient.addContainerRequest( new AMRMClient.ContainerRequest(capability, nodes, racks, priority)); amClient.addContainerRequest( @@ -532,17 +536,17 @@ public void testMixedAllocationAndRelease() throws YarnException, ExecutionType.OPPORTUNISTIC, true))); int containersRequestedNode = amClient.getTable(0).get(priority, - node, ExecutionType.GUARANTEED, capability).remoteRequest + node, ExecutionType.GUARANTEED, profileCapability).remoteRequest .getNumContainers(); int containersRequestedRack = amClient.getTable(0).get(priority, - rack, ExecutionType.GUARANTEED, capability).remoteRequest + rack, ExecutionType.GUARANTEED, profileCapability).remoteRequest .getNumContainers(); int containersRequestedAny = amClient.getTable(0).get(priority, - ResourceRequest.ANY, ExecutionType.GUARANTEED, capability) + ResourceRequest.ANY, ExecutionType.GUARANTEED, profileCapability) .remoteRequest.getNumContainers(); int oppContainersRequestedAny = amClient.getTable(0).get(priority2, ResourceRequest.ANY, - ExecutionType.OPPORTUNISTIC, capability).remoteRequest + ExecutionType.OPPORTUNISTIC, profileCapability).remoteRequest .getNumContainers(); assertEquals(4, containersRequestedNode); @@ -564,17 +568,17 @@ public void testMixedAllocationAndRelease() throws YarnException, ExecutionType.OPPORTUNISTIC, true))); containersRequestedNode = amClient.getTable(0).get(priority, - node, ExecutionType.GUARANTEED, capability).remoteRequest + node, ExecutionType.GUARANTEED, profileCapability).remoteRequest .getNumContainers(); containersRequestedRack = amClient.getTable(0).get(priority, - rack, ExecutionType.GUARANTEED, capability).remoteRequest + rack, ExecutionType.GUARANTEED, profileCapability).remoteRequest .getNumContainers(); containersRequestedAny = amClient.getTable(0).get(priority, - ResourceRequest.ANY, ExecutionType.GUARANTEED, capability) + ResourceRequest.ANY, ExecutionType.GUARANTEED, profileCapability) .remoteRequest.getNumContainers(); oppContainersRequestedAny = amClient.getTable(0).get(priority2, ResourceRequest.ANY, - ExecutionType.OPPORTUNISTIC, capability).remoteRequest + ExecutionType.OPPORTUNISTIC, profileCapability).remoteRequest .getNumContainers(); assertEquals(2, containersRequestedNode); @@ -691,10 +695,9 @@ public void testOpportunisticAllocation() throws YarnException, IOException { ExecutionTypeRequest.newInstance( ExecutionType.OPPORTUNISTIC, true))); - int oppContainersRequestedAny = - amClient.getTable(0).get(priority3, ResourceRequest.ANY, - ExecutionType.OPPORTUNISTIC, capability).remoteRequest - .getNumContainers(); + int oppContainersRequestedAny = amClient.getTable(0) + .get(priority3, ResourceRequest.ANY, ExecutionType.OPPORTUNISTIC, + profileCapability).remoteRequest.getNumContainers(); assertEquals(2, oppContainersRequestedAny); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java index 41ef404be6b..cd0e4728421 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java @@ -48,6 +48,7 @@ import org.apache.hadoop.io.DataInputByteBuffer; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.Text; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; @@ -121,6 +122,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSystemTestUtil; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; @@ -131,14 +133,16 @@ import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; import org.mockito.ArgumentCaptor; public class TestYarnClient { - @Test - public void test() { - // More to come later. + @Before + public void setup() { + QueueMetrics.clearQueueMetrics(); + DefaultMetricsSystem.setMiniClusterMode(true); } @Test diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestLogsCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestLogsCLI.java index d58732e006a..fed74887d52 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestLogsCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestLogsCLI.java @@ -196,7 +196,7 @@ public void testUnknownApplicationId() throws Exception { "Unable to get ApplicationState")); } - @Test(timeout = 5000l) + @Test (timeout = 10000) public void testHelpMessage() throws Exception { Configuration conf = new YarnConfiguration(); YarnClient mockYarnClient = createMockYarnClient( @@ -207,79 +207,102 @@ public void testHelpMessage() throws Exception { int exitCode = dumper.run(new String[]{}); assertTrue(exitCode == -1); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintWriter pw = new PrintWriter(baos); pw.println("Retrieve logs for YARN applications."); pw.println("usage: yarn logs -applicationId [OPTIONS]"); pw.println(); pw.println("general options are:"); - pw.println(" -am Prints the AM Container logs for"); - pw.println(" this application. Specify"); - pw.println(" comma-separated value to get logs"); - pw.println(" for related AM Container. For"); - pw.println(" example, If we specify -am 1,2,"); - pw.println(" we will get the logs for the"); - pw.println(" first AM Container as well as the"); - pw.println(" second AM Container. To get logs"); - pw.println(" for all AM Containers, use -am"); - pw.println(" ALL. To get logs for the latest"); - pw.println(" AM Container, use -am -1. By"); - pw.println(" default, it will print all"); - pw.println(" available logs. Work with"); - pw.println(" -log_files to get only specific"); - pw.println(" logs."); - pw.println(" -appOwner AppOwner (assumed to be current"); - pw.println(" user if not specified)"); - pw.println(" -containerId ContainerId. By default, it will"); - pw.println(" print all available logs. Work"); - pw.println(" with -log_files to get only"); - pw.println(" specific logs. If specified, the"); - pw.println(" applicationId can be omitted"); - pw.println(" -help Displays help for all commands."); - pw.println(" -list_nodes Show the list of nodes that"); - pw.println(" successfully aggregated logs."); - pw.println(" This option can only be used with"); - pw.println(" finished applications."); - pw.println(" -log_files Specify comma-separated value to"); - pw.println(" get exact matched log files. Use"); - pw.println(" \"ALL\" or \"*\" to fetch all the log"); - pw.println(" files for the container."); - pw.println(" -log_files_pattern Specify comma-separated value to"); - pw.println(" get matched log files by using"); - pw.println(" java regex. Use \".*\" to fetch all"); - pw.println(" the log files for the container."); - pw.println(" -nodeAddress NodeAddress in the format"); - pw.println(" nodename:port"); - pw.println(" -out Local directory for storing"); - pw.println(" individual container logs. The"); - pw.println(" container logs will be stored"); - pw.println(" based on the node the container"); - pw.println(" ran on."); - pw.println(" -show_application_log_info Show the containerIds which"); - pw.println(" belong to the specific"); - pw.println(" Application. You can combine this"); - pw.println(" with --nodeAddress to get"); - pw.println(" containerIds for all the"); - pw.println(" containers on the specific"); - pw.println(" NodeManager."); - pw.println(" -show_container_log_info Show the container log metadata,"); - pw.println(" including log-file names, the"); - pw.println(" size of the log files. You can"); - pw.println(" combine this with --containerId"); - pw.println(" to get log metadata for the"); - pw.println(" specific container, or with"); - pw.println(" --nodeAddress to get log metadata"); - pw.println(" for all the containers on the"); - pw.println(" specific NodeManager."); - pw.println(" -size Prints the log file's first 'n'"); - pw.println(" bytes or the last 'n' bytes. Use"); - pw.println(" negative values as bytes to read"); - pw.println(" from the end and positive values"); - pw.println(" as bytes to read from the"); - pw.println(" beginning."); + pw.println(" -am Prints the AM Container logs"); + pw.println(" for this application."); + pw.println(" Specify comma-separated"); + pw.println(" value to get logs for"); + pw.println(" related AM Container. For"); + pw.println(" example, If we specify -am"); + pw.println(" 1,2, we will get the logs"); + pw.println(" for the first AM Container"); + pw.println(" as well as the second AM"); + pw.println(" Container. To get logs for"); + pw.println(" all AM Containers, use -am"); + pw.println(" ALL. To get logs for the"); + pw.println(" latest AM Container, use -am"); + pw.println(" -1. By default, it will"); + pw.println(" print all available logs."); + pw.println(" Work with -log_files to get"); + pw.println(" only specific logs."); + pw.println(" -appOwner AppOwner (assumed to be"); + pw.println(" current user if not"); + pw.println(" specified)"); + pw.println(" -client_max_retries Set max retry number for a"); + pw.println(" retry client to get the"); + pw.println(" container logs for the"); + pw.println(" running applications. Use a"); + pw.println(" negative value to make retry"); + pw.println(" forever. The default value"); + pw.println(" is 30."); + pw.println(" -client_retry_interval_ms Work with"); + pw.println(" --client_max_retries to"); + pw.println(" create a retry client. The"); + pw.println(" default value is 1000."); + pw.println(" -containerId ContainerId. By default, it"); + pw.println(" will print all available"); + pw.println(" logs. Work with -log_files"); + pw.println(" to get only specific logs."); + pw.println(" If specified, the"); + pw.println(" applicationId can be omitted"); + pw.println(" -help Displays help for all"); + pw.println(" commands."); + pw.println(" -list_nodes Show the list of nodes that"); + pw.println(" successfully aggregated"); + pw.println(" logs. This option can only"); + pw.println(" be used with finished"); + pw.println(" applications."); + pw.println(" -log_files Specify comma-separated"); + pw.println(" value to get exact matched"); + pw.println(" log files. Use \"ALL\" or \"*\""); + pw.println(" to fetch all the log files"); + pw.println(" for the container."); + pw.println(" -log_files_pattern Specify comma-separated"); + pw.println(" value to get matched log"); + pw.println(" files by using java regex."); + pw.println(" Use \".*\" to fetch all the"); + pw.println(" log files for the container."); + pw.println(" -nodeAddress NodeAddress in the format"); + pw.println(" nodename:port"); + pw.println(" -out Local directory for storing"); + pw.println(" individual container logs."); + pw.println(" The container logs will be"); + pw.println(" stored based on the node the"); + pw.println(" container ran on."); + pw.println(" -show_application_log_info Show the containerIds which"); + pw.println(" belong to the specific"); + pw.println(" Application. You can combine"); + pw.println(" this with --nodeAddress to"); + pw.println(" get containerIds for all the"); + pw.println(" containers on the specific"); + pw.println(" NodeManager."); + pw.println(" -show_container_log_info Show the container log"); + pw.println(" metadata, including log-file"); + pw.println(" names, the size of the log"); + pw.println(" files. You can combine this"); + pw.println(" with --containerId to get"); + pw.println(" log metadata for the"); + pw.println(" specific container, or with"); + pw.println(" --nodeAddress to get log"); + pw.println(" metadata for all the"); + pw.println(" containers on the specific"); + pw.println(" NodeManager."); + pw.println(" -size Prints the log file's first"); + pw.println(" 'n' bytes or the last 'n'"); + pw.println(" bytes. Use negative values"); + pw.println(" as bytes to read from the"); + pw.println(" end and positive values as"); + pw.println(" bytes to read from the"); + pw.println(" beginning."); pw.close(); String appReportStr = baos.toString("UTF-8"); - Assert.assertEquals(appReportStr, sysOutStream.toString()); + Assert.assertTrue(sysOutStream.toString().contains(appReportStr)); } @Test (timeout = 15000) @@ -609,6 +632,56 @@ public ContainerReport getContainerReport(String containerIdStr) fs.delete(new Path(rootLogDir), true); } + @Test + public void testCheckRetryCount() throws Exception { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + + NodeId nodeId = NodeId.newInstance("localhost", 1234); + ApplicationId appId = ApplicationId.newInstance(0, 1); + ApplicationAttemptId appAttemptId = ApplicationAttemptId + .newInstance(appId, 1); + + // Create a mock ApplicationAttempt Report + ApplicationAttemptReport mockAttemptReport = mock( + ApplicationAttemptReport.class); + doReturn(appAttemptId).when(mockAttemptReport).getApplicationAttemptId(); + List attemptReports = Arrays.asList( + mockAttemptReport); + + // Create one mock containerReport + ContainerId containerId1 = ContainerId.newContainerId(appAttemptId, 1); + ContainerReport mockContainerReport1 = mock(ContainerReport.class); + doReturn(containerId1).when(mockContainerReport1).getContainerId(); + doReturn(nodeId).when(mockContainerReport1).getAssignedNode(); + doReturn("http://localhost:2345").when(mockContainerReport1) + .getNodeHttpAddress(); + doReturn(ContainerState.RUNNING).when(mockContainerReport1) + .getContainerState(); + List containerReports = Arrays.asList( + mockContainerReport1); + // Mock the YarnClient, and it would report the previous created + // mockAttemptReport and previous two created mockContainerReports + YarnClient mockYarnClient = createMockYarnClient( + YarnApplicationState.RUNNING, ugi.getShortUserName(), true, + attemptReports, containerReports); + doReturn(mockContainerReport1).when(mockYarnClient).getContainerReport( + any(ContainerId.class)); + LogsCLI cli = new LogsCLIForTest(mockYarnClient); + cli.setConf(new YarnConfiguration()); + try { + cli.run(new String[] {"-containerId", + containerId1.toString(), "-client_max_retries", "5"}); + Assert.fail("Exception expected! " + + "NodeManager should be off to run this test. "); + } catch (RuntimeException ce) { + Assert.assertTrue( + "Handler exception for reason other than retry: " + ce.getMessage(), + ce.getMessage().contains("Connection retries limit exceeded")); + Assert.assertTrue("Retry filter didn't perform any retries! ", cli + .connectionRetry.getRetired()); + } + } + @Test (timeout = 5000) public void testGetRunningContainerLogs() throws Exception { UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java index 3c35b9cd313..84cfb0ad222 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java @@ -39,8 +39,10 @@ import java.util.Collections; import java.util.Date; import java.util.EnumSet; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.regex.Pattern; @@ -48,6 +50,7 @@ import org.apache.commons.lang.time.DateFormatUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsRequest; +import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -68,6 +71,7 @@ import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.QueueState; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.api.records.SignalContainerCommand; import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState; @@ -117,9 +121,18 @@ public void testGetApplicationReport() throws Exception { for (int i = 0; i < 2; ++i) { ApplicationCLI cli = createAndGetAppCLI(); ApplicationId applicationId = ApplicationId.newInstance(1234, 5); + Map resourceSecondsMap = new HashMap<>(); + Map preemptedResoureSecondsMap = new HashMap<>(); + resourceSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), 123456L); + resourceSecondsMap.put(ResourceInformation.VCORES.getName(), 4567L); + preemptedResoureSecondsMap + .put(ResourceInformation.MEMORY_MB.getName(), 1111L); + preemptedResoureSecondsMap + .put(ResourceInformation.VCORES.getName(), 2222L); ApplicationResourceUsageReport usageReport = i == 0 ? null : - ApplicationResourceUsageReport.newInstance( - 2, 0, null, null, null, 123456, 4567, 0, 0, 1111, 2222); + ApplicationResourceUsageReport + .newInstance(2, 0, null, null, null, resourceSecondsMap, 0, 0, + preemptedResoureSecondsMap); ApplicationReport newApplicationReport = ApplicationReport.newInstance( applicationId, ApplicationAttemptId.newInstance(applicationId, 1), "user", "queue", "appname", "host", 124, null, @@ -2148,17 +2161,16 @@ public void testUpdateApplicationTimeout() throws Exception { ApplicationCLI cli = createAndGetAppCLI(); ApplicationId applicationId = ApplicationId.newInstance(1234, 6); - ApplicationReport appReport = ApplicationReport.newInstance(applicationId, - ApplicationAttemptId.newInstance(applicationId, 1), "user", "queue", - "appname", "host", 124, null, YarnApplicationState.RUNNING, - "diagnostics", "url", 0, 0, FinalApplicationStatus.UNDEFINED, null, - "N/A", 0.53789f, "YARN", null); - ApplicationTimeout timeout = ApplicationTimeout - .newInstance(ApplicationTimeoutType.LIFETIME, "N/A", -1); - appReport.setApplicationTimeouts( - Collections.singletonMap(timeout.getTimeoutType(), timeout)); - when(client.getApplicationReport(any(ApplicationId.class))) - .thenReturn(appReport); + UpdateApplicationTimeoutsResponse response = + mock(UpdateApplicationTimeoutsResponse.class); + String formatISO8601 = + Times.formatISO8601(System.currentTimeMillis() + 5 * 1000); + when(response.getApplicationTimeouts()).thenReturn(Collections + .singletonMap(ApplicationTimeoutType.LIFETIME, formatISO8601)); + + when(client + .updateApplicationTimeouts(any(UpdateApplicationTimeoutsRequest.class))) + .thenReturn(response); int result = cli.run(new String[] { "application", "-appId", applicationId.toString(), "-updateLifetime", "10" }); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/resources/resource-profiles.json b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/resources/resource-profiles.json new file mode 100644 index 00000000000..d0f3f7268e4 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/resources/resource-profiles.json @@ -0,0 +1,18 @@ +{ + "minimum": { + "memory-mb" : 1024, + "vcores" : 1 + }, + "default" : { + "memory-mb" : 2048, + "vcores" : 2 + }, + "maximum" : { + "memory-mb": 4096, + "vcores" : 4 + }, + "http" : { + "memory-mb" : 2048, + "vcores" : 2 + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml index 024b3c9fb06..e46eedafe95 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml @@ -213,6 +213,14 @@ true + + + ${project.basedir}/src/test/resources + + + ${project.basedir}/src/test/resources/resource-types + + org.apache.rat diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java index cef03b9b052..73c49906c37 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java @@ -51,7 +51,8 @@ public synchronized InputStream getConfigurationInputStream( "Illegal argument! The parameter should not be null or empty"); } Path filePath; - if (YarnConfiguration.RM_CONFIGURATION_FILES.contains(name)) { + if (YarnConfiguration.RM_CONFIGURATION_FILES.contains(name) || + YarnConfiguration.NM_CONFIGURATION_FILES.contains(name)) { filePath = new Path(this.configDir, name); if (!fs.exists(filePath)) { LOG.info(filePath + " not found"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java index cfa194fb5b2..0cdbd1516d5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java @@ -39,7 +39,8 @@ public InputStream getConfigurationInputStream(Configuration bootstrapConf, if (name == null || name.isEmpty()) { throw new YarnException( "Illegal argument! The parameter should not be null or empty"); - } else if (YarnConfiguration.RM_CONFIGURATION_FILES.contains(name)) { + } else if (YarnConfiguration.RM_CONFIGURATION_FILES.contains(name) || + YarnConfiguration.NM_CONFIGURATION_FILES.contains(name)) { return bootstrapConf.getConfResourceAsInputStream(name); } return new FileInputStream(name); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java index ad7cb296080..fd5096a7b37 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ApplicationClientProtocolPBClientImpl.java @@ -89,6 +89,12 @@ import org.apache.hadoop.yarn.api.protocolrecords.SignalContainerResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileResponse; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FailApplicationAttemptRequestPBImpl; @@ -147,6 +153,12 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SignalContainerResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceProfilesRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceProfilesResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceTypeInfoRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceTypeInfoResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetResourceProfileRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetResourceProfileResponsePBImpl; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.proto.YarnServiceProtos; @@ -619,4 +631,46 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( return null; } } + + @Override + public GetAllResourceProfilesResponse getResourceProfiles( + GetAllResourceProfilesRequest request) throws YarnException, IOException { + YarnServiceProtos.GetAllResourceProfilesRequestProto requestProto = + ((GetAllResourceProfilesRequestPBImpl) request).getProto(); + try { + return new GetAllResourceProfilesResponsePBImpl( + proxy.getResourceProfiles(null, requestProto)); + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + return null; + } + } + + @Override + public GetResourceProfileResponse getResourceProfile( + GetResourceProfileRequest request) throws YarnException, IOException { + YarnServiceProtos.GetResourceProfileRequestProto requestProto = + ((GetResourceProfileRequestPBImpl) request).getProto(); + try { + return new GetResourceProfileResponsePBImpl( + proxy.getResourceProfile(null, requestProto)); + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + return null; + } + } + + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException { + YarnServiceProtos.GetAllResourceTypeInfoRequestProto requestProto = + ((GetAllResourceTypeInfoRequestPBImpl) request).getProto(); + try { + return new GetAllResourceTypeInfoResponsePBImpl( + proxy.getResourceTypeInfo(null, requestProto)); + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + return null; + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java index 93ce6a343c5..423287e9105 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ApplicationClientProtocolPBServiceImpl.java @@ -58,6 +58,9 @@ import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.SignalContainerResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileResponse; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FailApplicationAttemptRequestPBImpl; @@ -116,6 +119,12 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.UpdateApplicationTimeoutsResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceProfilesRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceProfilesResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceTypeInfoRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceTypeInfoResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetResourceProfileRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetResourceProfileResponsePBImpl; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.proto.YarnServiceProtos; import org.apache.hadoop.yarn.proto.YarnServiceProtos.FailApplicationAttemptRequestProto; @@ -169,6 +178,12 @@ import org.apache.hadoop.yarn.proto.YarnServiceProtos.UpdateApplicationTimeoutsResponseProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceProfilesResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceTypeInfoRequestProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceTypeInfoResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceProfilesRequestProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetResourceProfileRequestProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetResourceProfileResponseProto; import com.google.protobuf.RpcController; import com.google.protobuf.ServiceException; @@ -631,4 +646,52 @@ public UpdateApplicationTimeoutsResponseProto updateApplicationTimeouts( throw new ServiceException(e); } } + + @Override + public GetAllResourceProfilesResponseProto getResourceProfiles( + RpcController controller, GetAllResourceProfilesRequestProto proto) + throws ServiceException { + GetAllResourceProfilesRequestPBImpl req = + new GetAllResourceProfilesRequestPBImpl(proto); + try { + GetAllResourceProfilesResponse resp = real.getResourceProfiles(req); + return ((GetAllResourceProfilesResponsePBImpl) resp).getProto(); + } catch (YarnException ye) { + throw new ServiceException(ye); + } catch (IOException ie) { + throw new ServiceException(ie); + } + } + + @Override + public GetResourceProfileResponseProto getResourceProfile( + RpcController controller, GetResourceProfileRequestProto proto) + throws ServiceException { + GetResourceProfileRequestPBImpl req = + new GetResourceProfileRequestPBImpl(proto); + try { + GetResourceProfileResponse resp = real.getResourceProfile(req); + return ((GetResourceProfileResponsePBImpl) resp).getProto(); + } catch (YarnException ye) { + throw new ServiceException(ye); + } catch (IOException ie) { + throw new ServiceException(ie); + } + } + + @Override + public GetAllResourceTypeInfoResponseProto getResourceTypeInfo( + RpcController controller, GetAllResourceTypeInfoRequestProto proto) + throws ServiceException { + GetAllResourceTypeInfoRequestPBImpl req = new GetAllResourceTypeInfoRequestPBImpl( + proto); + try { + GetAllResourceTypeInfoResponse resp = real.getResourceTypeInfo(req); + return ((GetAllResourceTypeInfoResponsePBImpl) resp).getProto(); + } catch (YarnException ye) { + throw new ServiceException(ye); + } catch (IOException ie) { + throw new ServiceException(ie); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceProfilesRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceProfilesRequestPBImpl.java new file mode 100644 index 00000000000..ba06251f7af --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceProfilesRequestPBImpl.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesRequest; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceProfilesRequestProto; + +/** + * Protobuf implementation class for GetAllResourceProfilesRequest. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class GetAllResourceProfilesRequestPBImpl + extends GetAllResourceProfilesRequest { + + private GetAllResourceProfilesRequestProto proto = + GetAllResourceProfilesRequestProto.getDefaultInstance(); + private GetAllResourceProfilesRequestProto.Builder builder = null; + + private boolean viaProto = false; + + public GetAllResourceProfilesRequestPBImpl() { + builder = GetAllResourceProfilesRequestProto.newBuilder(); + } + + public GetAllResourceProfilesRequestPBImpl( + GetAllResourceProfilesRequestProto proto) { + this.proto = proto; + viaProto = true; + } + + public GetAllResourceProfilesRequestProto getProto() { + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceProfilesResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceProfilesResponsePBImpl.java new file mode 100644 index 00000000000..eaa392f3703 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceProfilesResponsePBImpl.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesResponse; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProfilesProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProfileEntry; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceProfilesResponseProtoOrBuilder; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceProfilesResponseProto; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Protobuf implementation class for the GetAllResourceProfilesResponse. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class GetAllResourceProfilesResponsePBImpl + extends GetAllResourceProfilesResponse { + + private GetAllResourceProfilesResponseProto proto = + GetAllResourceProfilesResponseProto.getDefaultInstance(); + private GetAllResourceProfilesResponseProto.Builder builder = null; + private boolean viaProto = false; + private Map profiles; + + public GetAllResourceProfilesResponsePBImpl() { + builder = GetAllResourceProfilesResponseProto.newBuilder(); + } + + public GetAllResourceProfilesResponsePBImpl( + GetAllResourceProfilesResponseProto proto) { + this.proto = proto; + viaProto = true; + } + + public GetAllResourceProfilesResponseProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = GetAllResourceProfilesResponseProto.newBuilder(proto); + } + viaProto = false; + } + + private void mergeLocalToBuilder() { + if (profiles != null) { + addProfilesToProto(); + } + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void addProfilesToProto() { + maybeInitBuilder(); + builder.clearResourceProfiles(); + if (profiles == null) { + return; + } + ResourceProfilesProto.Builder profilesBuilder = + ResourceProfilesProto.newBuilder(); + for (Map.Entry entry : profiles.entrySet()) { + ResourceProfileEntry.Builder profileEntry = + ResourceProfileEntry.newBuilder(); + profileEntry.setName(entry.getKey()); + profileEntry.setResources(convertToProtoFormat(entry.getValue())); + profilesBuilder.addResourceProfilesMap(profileEntry); + } + builder.setResourceProfiles(profilesBuilder.build()); + } + + public void setResourceProfiles(Map resourceProfiles) { + initResourceProfiles(); + profiles.clear(); + profiles.putAll(resourceProfiles); + } + + public Map getResourceProfiles() { + initResourceProfiles(); + return profiles; + } + + private void initResourceProfiles() { + if (profiles != null) { + return; + } + profiles = new HashMap<>(); + GetAllResourceProfilesResponseProtoOrBuilder p = viaProto ? proto : builder; + List profilesList = + p.getResourceProfiles().getResourceProfilesMapList(); + for (ResourceProfileEntry entry : profilesList) { + profiles.put(entry.getName(), new ResourcePBImpl(entry.getResources())); + } + } + + private ResourceProto convertToProtoFormat(Resource res) { + ResourcePBImpl r = new ResourcePBImpl(); + r.setMemorySize(res.getMemorySize()); + r.setVirtualCores(res.getVirtualCores()); + return r.getProto(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceTypeInfoRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceTypeInfoRequestPBImpl.java new file mode 100644 index 00000000000..b3f4692412e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceTypeInfoRequestPBImpl.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceTypeInfoRequestProto; + +/** + * Protobuf implementation class for GetAllResourceTypeInfoRequest. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class GetAllResourceTypeInfoRequestPBImpl + extends GetAllResourceTypeInfoRequest { + + private GetAllResourceTypeInfoRequestProto proto = + GetAllResourceTypeInfoRequestProto.getDefaultInstance(); + private GetAllResourceTypeInfoRequestProto.Builder builder = null; + + private boolean viaProto = false; + + public GetAllResourceTypeInfoRequestPBImpl() { + builder = GetAllResourceTypeInfoRequestProto.newBuilder(); + } + + public GetAllResourceTypeInfoRequestPBImpl( + GetAllResourceTypeInfoRequestProto proto) { + this.proto = proto; + viaProto = true; + } + + public GetAllResourceTypeInfoRequestProto getProto() { + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + @Override + public int hashCode() { + return getProto().hashCode(); + } + + @Override + public boolean equals(Object other) { + if (other == null) { + return false; + } + if (other.getClass().isAssignableFrom(this.getClass())) { + return this.getProto().equals(this.getClass().cast(other).getProto()); + } + return false; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceTypeInfoResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceTypeInfoResponsePBImpl.java new file mode 100644 index 00000000000..28decebcabf --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetAllResourceTypeInfoResponsePBImpl.java @@ -0,0 +1,184 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; +import org.apache.hadoop.yarn.api.records.impl.pb.ResourceTypeInfoPBImpl; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceTypeInfoProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceTypeInfoResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceTypeInfoResponseProtoOrBuilder; + +import com.google.protobuf.TextFormat; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +/** + * Protobuf implementation class for the GetAllResourceTypeInfoResponse. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class GetAllResourceTypeInfoResponsePBImpl + extends + GetAllResourceTypeInfoResponse { + + private GetAllResourceTypeInfoResponseProto proto = GetAllResourceTypeInfoResponseProto + .getDefaultInstance(); + private GetAllResourceTypeInfoResponseProto.Builder builder = null; + private boolean viaProto = false; + + private List resourceTypeInfo; + + public GetAllResourceTypeInfoResponsePBImpl() { + builder = GetAllResourceTypeInfoResponseProto.newBuilder(); + } + + public GetAllResourceTypeInfoResponsePBImpl( + GetAllResourceTypeInfoResponseProto proto) { + this.proto = proto; + viaProto = true; + } + + public GetAllResourceTypeInfoResponseProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + @Override + public int hashCode() { + return getProto().hashCode(); + } + + @Override + public void setResourceTypeInfo(List resourceTypes) { + if (resourceTypeInfo == null) { + builder.clearResourceTypeInfo(); + } + this.resourceTypeInfo = resourceTypes; + } + + @Override + public List getResourceTypeInfo() { + initResourceTypeInfosList(); + return this.resourceTypeInfo; + } + + @Override + public boolean equals(Object other) { + if (other == null) { + return false; + } + if (other.getClass().isAssignableFrom(this.getClass())) { + return this.getProto().equals(this.getClass().cast(other).getProto()); + } + return false; + } + + @Override + public String toString() { + return TextFormat.shortDebugString(getProto()); + } + + private void mergeLocalToBuilder() { + if (this.resourceTypeInfo != null) { + addResourceTypeInfosToProto(); + } + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = GetAllResourceTypeInfoResponseProto.newBuilder(proto); + } + viaProto = false; + } + + // Once this is called. containerList will never be null - until a getProto + // is called. + private void initResourceTypeInfosList() { + if (this.resourceTypeInfo != null) { + return; + } + GetAllResourceTypeInfoResponseProtoOrBuilder p = viaProto ? proto : builder; + List list = p.getResourceTypeInfoList(); + resourceTypeInfo = new ArrayList(); + + for (ResourceTypeInfoProto a : list) { + resourceTypeInfo.add(convertFromProtoFormat(a)); + } + } + + private void addResourceTypeInfosToProto() { + maybeInitBuilder(); + builder.clearResourceTypeInfo(); + if (resourceTypeInfo == null) { + return; + } + Iterable iterable = new Iterable() { + @Override + public Iterator iterator() { + return new Iterator() { + + Iterator iter = resourceTypeInfo.iterator(); + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public ResourceTypeInfoProto next() { + return convertToProtoFormat(iter.next()); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + + } + }; + + } + }; + builder.addAllResourceTypeInfo(iterable); + } + + private ResourceTypeInfoPBImpl convertFromProtoFormat( + ResourceTypeInfoProto p) { + return new ResourceTypeInfoPBImpl(p); + } + + private ResourceTypeInfoProto convertToProtoFormat(ResourceTypeInfo t) { + return ((ResourceTypeInfoPBImpl) t).getProto(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetResourceProfileRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetResourceProfileRequestPBImpl.java new file mode 100644 index 00000000000..89a680c9f9e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetResourceProfileRequestPBImpl.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileRequest; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetResourceProfileRequestProtoOrBuilder; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetResourceProfileRequestProto; + +/** + * Protobuf implementation for the GetResourceProfileRequest class. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class GetResourceProfileRequestPBImpl extends GetResourceProfileRequest { + + private GetResourceProfileRequestProto proto = + GetResourceProfileRequestProto.getDefaultInstance(); + private GetResourceProfileRequestProto.Builder builder = null; + private boolean viaProto = false; + + private String profile; + + public GetResourceProfileRequestPBImpl() { + builder = GetResourceProfileRequestProto.newBuilder(); + } + + public GetResourceProfileRequestPBImpl(GetResourceProfileRequestProto proto) { + this.proto = proto; + viaProto = true; + } + + public GetResourceProfileRequestProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + @Override + public void setProfileName(String profileName) { + this.profile = profileName; + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void mergeLocalToBuilder() { + if (profile != null) { + builder.setProfile(profile); + } + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = GetResourceProfileRequestProto.newBuilder(proto); + } + viaProto = false; + } + + @Override + public String getProfileName() { + if (this.profile != null) { + return profile; + } + GetResourceProfileRequestProtoOrBuilder protoOrBuilder = + viaProto ? proto : builder; + if (protoOrBuilder.hasProfile()) { + profile = protoOrBuilder.getProfile(); + } + return profile; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetResourceProfileResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetResourceProfileResponsePBImpl.java new file mode 100644 index 00000000000..e08d077a786 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetResourceProfileResponsePBImpl.java @@ -0,0 +1,107 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileResponse; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetResourceProfileResponseProtoOrBuilder; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetResourceProfileResponseProto; +import org.apache.hadoop.yarn.util.resource.Resources; + +/** + * Protobuf implementation for the GetResourceProfileResponse class. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class GetResourceProfileResponsePBImpl + extends GetResourceProfileResponse { + + private GetResourceProfileResponseProto proto = + GetResourceProfileResponseProto.getDefaultInstance(); + private GetResourceProfileResponseProto.Builder builder = null; + private boolean viaProto = false; + + private Resource resource; + + public GetResourceProfileResponsePBImpl() { + builder = GetResourceProfileResponseProto.newBuilder(); + } + + public GetResourceProfileResponsePBImpl( + GetResourceProfileResponseProto proto) { + this.proto = proto; + viaProto = true; + } + + public Resource getResource() { + if (resource != null) { + return resource; + } + GetResourceProfileResponseProtoOrBuilder p = viaProto ? proto : builder; + if (p.hasResources()) { + resource = Resource.newInstance(p.getResources().getMemory(), + p.getResources().getVirtualCores()); + } + return resource; + } + + public void setResource(Resource r) { + resource = Resources.clone(r); + } + + public GetResourceProfileResponseProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void mergeLocalToBuilder() { + if (resource != null) { + builder.setResources(convertToProtoFormat(resource)); + } + } + + private ResourceProto convertToProtoFormat(Resource res) { + ResourcePBImpl r = new ResourcePBImpl(); + r.setMemorySize(res.getMemorySize()); + r.setVirtualCores(res.getVirtualCores()); + return r.getProto(); + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = GetResourceProfileResponseProto.newBuilder(proto); + } + viaProto = false; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/RegisterApplicationMasterResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/RegisterApplicationMasterResponsePBImpl.java index 1a70933a284..032bbc36224 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/RegisterApplicationMasterResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/RegisterApplicationMasterResponsePBImpl.java @@ -33,6 +33,8 @@ import org.apache.hadoop.yarn.api.records.impl.pb.NMTokenPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils; import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProfilesProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProfileEntry; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationACLMapProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; @@ -59,6 +61,7 @@ public class RegisterApplicationMasterResponsePBImpl extends private List containersFromPreviousAttempts = null; private List nmTokens = null; private EnumSet schedulerResourceTypes = null; + private Map profiles = null; public RegisterApplicationMasterResponsePBImpl() { builder = RegisterApplicationMasterResponseProto.newBuilder(); @@ -123,6 +126,9 @@ private void mergeLocalToBuilder() { if(schedulerResourceTypes != null) { addSchedulerResourceTypes(); } + if (profiles != null) { + addResourceProfiles(); + } } @@ -433,6 +439,58 @@ public void setSchedulerResourceTypes(EnumSet types) { this.schedulerResourceTypes.addAll(types); } + private void addResourceProfiles() { + maybeInitBuilder(); + builder.clearResourceProfiles(); + if (profiles == null) { + return; + } + ResourceProfilesProto.Builder profilesBuilder = + ResourceProfilesProto.newBuilder(); + for (Map.Entry entry : profiles.entrySet()) { + ResourceProfileEntry.Builder entryBuilder = + ResourceProfileEntry.newBuilder(); + entryBuilder.setName(entry.getKey()); + entryBuilder.setResources(convertToProtoFormat(entry.getValue())); + profilesBuilder.addResourceProfilesMap(entryBuilder.build()); + } + builder.setResourceProfiles(profilesBuilder.build()); + } + + private void initResourceProfiles() { + if (this.profiles != null) { + return; + } + this.profiles = new HashMap<>(); + RegisterApplicationMasterResponseProtoOrBuilder p = + viaProto ? proto : builder; + + if (p.hasResourceProfiles()) { + ResourceProfilesProto profilesProto = p.getResourceProfiles(); + for (ResourceProfileEntry entry : profilesProto + .getResourceProfilesMapList()) { + this.profiles + .put(entry.getName(), convertFromProtoFormat(entry.getResources())); + } + } + } + + @Override + public Map getResourceProfiles() { + initResourceProfiles(); + return this.profiles; + } + + @Override + public void setResourceProfiles(Map profilesMap) { + if (profilesMap == null) { + return; + } + initResourceProfiles(); + this.profiles.clear(); + this.profiles.putAll(profilesMap); + } + private Resource convertFromProtoFormat(ResourceProto resource) { return new ResourcePBImpl(resource); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/UpdateApplicationTimeoutsResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/UpdateApplicationTimeoutsResponsePBImpl.java index 74f17155f40..0c94f976017 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/UpdateApplicationTimeoutsResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/UpdateApplicationTimeoutsResponsePBImpl.java @@ -18,10 +18,19 @@ package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsResponse; +import org.apache.hadoop.yarn.api.records.ApplicationTimeoutType; +import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils; +import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationUpdateTimeoutMapProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.UpdateApplicationTimeoutsResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.UpdateApplicationTimeoutsResponseProtoOrBuilder; import com.google.protobuf.TextFormat; @@ -33,6 +42,7 @@ public class UpdateApplicationTimeoutsResponsePBImpl UpdateApplicationTimeoutsResponseProto.getDefaultInstance(); UpdateApplicationTimeoutsResponseProto.Builder builder = null; boolean viaProto = false; + private Map applicationTimeouts = null; public UpdateApplicationTimeoutsResponsePBImpl() { builder = UpdateApplicationTimeoutsResponseProto.newBuilder(); @@ -45,11 +55,34 @@ public UpdateApplicationTimeoutsResponsePBImpl( } public UpdateApplicationTimeoutsResponseProto getProto() { + mergeLocalToProto(); proto = viaProto ? proto : builder.build(); viaProto = true; return proto; } + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = UpdateApplicationTimeoutsResponseProto.newBuilder(proto); + } + viaProto = false; + } + + private void mergeLocalToBuilder() { + if (this.applicationTimeouts != null) { + addApplicationTimeouts(); + } + } + @Override public int hashCode() { return getProto().hashCode(); @@ -70,4 +103,79 @@ public boolean equals(Object other) { public String toString() { return TextFormat.shortDebugString(getProto()); } + + @Override + public Map getApplicationTimeouts() { + initApplicationTimeout(); + return this.applicationTimeouts; + } + + private void initApplicationTimeout() { + if (this.applicationTimeouts != null) { + return; + } + UpdateApplicationTimeoutsResponseProtoOrBuilder p = + viaProto ? proto : builder; + List lists = + p.getApplicationTimeoutsList(); + this.applicationTimeouts = + new HashMap(lists.size()); + for (ApplicationUpdateTimeoutMapProto timeoutProto : lists) { + this.applicationTimeouts.put( + ProtoUtils + .convertFromProtoFormat(timeoutProto.getApplicationTimeoutType()), + timeoutProto.getExpireTime()); + } + } + + @Override + public void setApplicationTimeouts( + Map appTimeouts) { + if (appTimeouts == null) { + return; + } + initApplicationTimeout(); + this.applicationTimeouts.clear(); + this.applicationTimeouts.putAll(appTimeouts); + } + + private void addApplicationTimeouts() { + maybeInitBuilder(); + builder.clearApplicationTimeouts(); + if (applicationTimeouts == null) { + return; + } + Iterable values = + new Iterable() { + + @Override + public Iterator iterator() { + return new Iterator() { + private Iterator iterator = + applicationTimeouts.keySet().iterator(); + + @Override + public boolean hasNext() { + return iterator.hasNext(); + } + + @Override + public ApplicationUpdateTimeoutMapProto next() { + ApplicationTimeoutType key = iterator.next(); + return ApplicationUpdateTimeoutMapProto.newBuilder() + .setExpireTime(applicationTimeouts.get(key)) + .setApplicationTimeoutType( + ProtoUtils.convertToProtoFormat(key)) + .build(); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + this.builder.addAllApplicationTimeouts(values); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java index 1c85e28dca8..14ede5dbf34 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java @@ -22,12 +22,15 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationResourceUsageReportProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationResourceUsageReportProtoOrBuilder; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; import com.google.protobuf.TextFormat; +import java.util.Map; + @Private @Unstable public class ApplicationResourceUsageReportPBImpl @@ -41,6 +44,9 @@ public class ApplicationResourceUsageReportPBImpl Resource reservedResources; Resource neededResources; + private Map resourceSecondsMap; + private Map preemptedResourceSecondsMap; + public ApplicationResourceUsageReportPBImpl() { builder = ApplicationResourceUsageReportProto.newBuilder(); } @@ -49,6 +55,8 @@ public ApplicationResourceUsageReportPBImpl( ApplicationResourceUsageReportProto proto) { this.proto = proto; viaProto = true; + getResourceSecondsMap(); + getPreemptedResourceSecondsMap(); } public synchronized ApplicationResourceUsageReportProto getProto() { @@ -89,6 +97,23 @@ private void mergeLocalToBuilder() { if (this.neededResources != null) { builder.setNeededResources(convertToProtoFormat(this.neededResources)); } + builder.clearApplicationResourceUsageMap(); + builder.clearApplicationPreemptedResourceUsageMap(); + + if (preemptedResourceSecondsMap != null && !preemptedResourceSecondsMap + .isEmpty()) { + builder.addAllApplicationPreemptedResourceUsageMap(ProtoUtils + .convertMapToStringLongMapProtoList(preemptedResourceSecondsMap)); + } + if (resourceSecondsMap != null && !resourceSecondsMap.isEmpty()) { + builder.addAllApplicationResourceUsageMap( + ProtoUtils.convertMapToStringLongMapProtoList(resourceSecondsMap)); + } + + builder.setMemorySeconds(this.getMemorySeconds()); + builder.setVcoreSeconds(this.getVcoreSeconds()); + builder.setPreemptedMemorySeconds(this.getPreemptedMemorySeconds()); + builder.setPreemptedVcoreSeconds(this.getPreemptedVcoreSeconds()); } private void mergeLocalToProto() { @@ -196,54 +221,64 @@ public synchronized void setNeededResources(Resource reserved_resources) { @Override public synchronized void setMemorySeconds(long memory_seconds) { - maybeInitBuilder(); - builder.setMemorySeconds(memory_seconds); + getResourceSecondsMap() + .put(ResourceInformation.MEMORY_MB.getName(), memory_seconds); } - + @Override public synchronized long getMemorySeconds() { - ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder; - return p.getMemorySeconds(); + Map tmp = getResourceSecondsMap(); + if (tmp.containsKey(ResourceInformation.MEMORY_MB.getName())) { + return tmp.get(ResourceInformation.MEMORY_MB.getName()); + } + return 0; } @Override public synchronized void setVcoreSeconds(long vcore_seconds) { - maybeInitBuilder(); - builder.setVcoreSeconds(vcore_seconds); + getResourceSecondsMap() + .put(ResourceInformation.VCORES.getName(), vcore_seconds); } @Override public synchronized long getVcoreSeconds() { - ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder; - return (p.getVcoreSeconds()); + Map tmp = getResourceSecondsMap(); + if (tmp.containsKey(ResourceInformation.VCORES.getName())) { + return tmp.get(ResourceInformation.VCORES.getName()); + } + return 0; } @Override public synchronized void setPreemptedMemorySeconds( long preemptedMemorySeconds) { - maybeInitBuilder(); - builder.setPreemptedMemorySeconds(preemptedMemorySeconds); + getPreemptedResourceSecondsMap() + .put(ResourceInformation.MEMORY_MB.getName(), preemptedMemorySeconds); } @Override public synchronized long getPreemptedMemorySeconds() { - ApplicationResourceUsageReportProtoOrBuilder p = - viaProto ? proto : builder; - return p.getPreemptedMemorySeconds(); + Map tmp = getPreemptedResourceSecondsMap(); + if (tmp.containsKey(ResourceInformation.MEMORY_MB.getName())) { + return tmp.get(ResourceInformation.MEMORY_MB.getName()); + } + return 0; } @Override public synchronized void setPreemptedVcoreSeconds( long vcoreSeconds) { - maybeInitBuilder(); - builder.setPreemptedVcoreSeconds(vcoreSeconds); + getPreemptedResourceSecondsMap() + .put(ResourceInformation.VCORES.getName(), vcoreSeconds); } @Override public synchronized long getPreemptedVcoreSeconds() { - ApplicationResourceUsageReportProtoOrBuilder p = - viaProto ? proto : builder; - return (p.getPreemptedVcoreSeconds()); + Map tmp = getPreemptedResourceSecondsMap(); + if (tmp.containsKey(ResourceInformation.VCORES.getName())) { + return tmp.get(ResourceInformation.VCORES.getName()); + } + return 0; } private ResourcePBImpl convertFromProtoFormat(ResourceProto p) { @@ -277,4 +312,81 @@ public synchronized void setClusterUsagePercentage(float clusterUsagePerc) { maybeInitBuilder(); builder.setClusterUsagePercentage((clusterUsagePerc)); } + + @Override + public synchronized void setResourceSecondsMap( + Map resourceSecondsMap) { + this.resourceSecondsMap = resourceSecondsMap; + if (resourceSecondsMap == null) { + return; + } + if (!resourceSecondsMap + .containsKey(ResourceInformation.MEMORY_MB.getName())) { + this.setMemorySeconds(0L); + } + if (!resourceSecondsMap.containsKey(ResourceInformation.VCORES.getName())) { + this.setVcoreSeconds(0L); + } + } + + @Override + public synchronized Map getResourceSecondsMap() { + if (this.resourceSecondsMap != null) { + return this.resourceSecondsMap; + } + ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder; + this.resourceSecondsMap = ProtoUtils + .convertStringLongMapProtoListToMap( + p.getApplicationResourceUsageMapList()); + if (!this.resourceSecondsMap + .containsKey(ResourceInformation.MEMORY_MB.getName())) { + this.setMemorySeconds(p.getMemorySeconds()); + } + if (!this.resourceSecondsMap + .containsKey(ResourceInformation.VCORES.getName())) { + this.setVcoreSeconds(p.getVcoreSeconds()); + } + this.setMemorySeconds(p.getMemorySeconds()); + this.setVcoreSeconds(p.getVcoreSeconds()); + return this.resourceSecondsMap; + } + + @Override + public synchronized void setPreemptedResourceSecondsMap( + Map preemptedResourceSecondsMap) { + this.preemptedResourceSecondsMap = preemptedResourceSecondsMap; + if (preemptedResourceSecondsMap == null) { + return; + } + if (!preemptedResourceSecondsMap + .containsKey(ResourceInformation.MEMORY_MB.getName())) { + this.setPreemptedMemorySeconds(0L); + } + if (!preemptedResourceSecondsMap + .containsKey(ResourceInformation.VCORES.getName())) { + this.setPreemptedVcoreSeconds(0L); + } + } + + @Override + public synchronized Map getPreemptedResourceSecondsMap() { + if (this.preemptedResourceSecondsMap != null) { + return this.preemptedResourceSecondsMap; + } + ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder; + this.preemptedResourceSecondsMap = ProtoUtils + .convertStringLongMapProtoListToMap( + p.getApplicationPreemptedResourceUsageMapList()); + if (!this.preemptedResourceSecondsMap + .containsKey(ResourceInformation.MEMORY_MB.getName())) { + this.setPreemptedMemorySeconds(p.getPreemptedMemorySeconds()); + } + if (!this.preemptedResourceSecondsMap + .containsKey(ResourceInformation.VCORES.getName())) { + this.setPreemptedVcoreSeconds(p.getPreemptedVcoreSeconds()); + } + this.setPreemptedMemorySeconds(p.getPreemptedMemorySeconds()); + this.setPreemptedVcoreSeconds(p.getPreemptedVcoreSeconds()); + return this.preemptedResourceSecondsMap; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProfileCapabilityPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProfileCapabilityPBImpl.java new file mode 100644 index 00000000000..8c161f8a0fc --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProfileCapabilityPBImpl.java @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records.impl.pb; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.records.ProfileCapability; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.proto.YarnProtos; +import org.apache.hadoop.yarn.proto.YarnProtos.ProfileCapabilityProtoOrBuilder; +import org.apache.hadoop.yarn.proto.YarnProtos.ProfileCapabilityProto; +import org.apache.hadoop.yarn.util.resource.Resources; + +/** + * Protobuf implementation for the ProfileCapability class. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class ProfileCapabilityPBImpl extends ProfileCapability { + + private ProfileCapabilityProto proto = + ProfileCapabilityProto.getDefaultInstance(); + private ProfileCapabilityProto.Builder builder; + + private boolean viaProto; + + private String profile; + private Resource profileCapabilityOverride; + + public ProfileCapabilityPBImpl() { + builder = ProfileCapabilityProto.newBuilder(); + } + + public ProfileCapabilityPBImpl(ProfileCapabilityProto proto) { + this.proto = proto; + viaProto = true; + } + + @Override + public String getProfileName() { + if (profile != null) { + return profile; + } + ProfileCapabilityProtoOrBuilder p = viaProto ? proto : builder; + if (p.hasProfile()) { + profile = p.getProfile(); + } + return profile; + } + + @Override + public Resource getProfileCapabilityOverride() { + if (profileCapabilityOverride != null) { + return profileCapabilityOverride; + } + ProfileCapabilityProtoOrBuilder p = viaProto ? proto : builder; + if (p.hasProfileCapabilityOverride()) { + profileCapabilityOverride = + Resources.clone(new ResourcePBImpl(p.getProfileCapabilityOverride())); + } + return profileCapabilityOverride; + } + + @Override + public void setProfileName(String profileName) { + this.profile = profileName; + } + + @Override + public void setProfileCapabilityOverride(Resource r) { + this.profileCapabilityOverride = r; + } + + public ProfileCapabilityProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void mergeLocalToBuilder() { + if (profile != null) { + builder.setProfile(profile); + } + if (profileCapabilityOverride != null) { + builder.setProfileCapabilityOverride( + convertToProtoFormat(profileCapabilityOverride)); + } + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = ProfileCapabilityProto.newBuilder(proto); + } + viaProto = false; + } + + private YarnProtos.ResourceProto convertToProtoFormat(Resource res) { + ResourcePBImpl r = new ResourcePBImpl(); + r.setMemorySize(res.getMemorySize()); + r.setVirtualCores(res.getVirtualCores()); + return r.getProto(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java index fa9c43011b3..158c2ae72da 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java @@ -19,10 +19,15 @@ package org.apache.hadoop.yarn.api.records.impl.pb; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.protocolrecords.ApplicationsRequestScope; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; import org.apache.hadoop.yarn.api.records.AMCommand; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; @@ -44,6 +49,7 @@ import org.apache.hadoop.yarn.api.records.QueueState; import org.apache.hadoop.yarn.api.records.ReservationRequestInterpreter; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.UpdateContainerError; import org.apache.hadoop.yarn.api.records.UpdateContainerRequest; import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState; @@ -71,6 +77,7 @@ import org.apache.hadoop.yarn.proto.YarnProtos.ContainerTypeProto; import org.apache.hadoop.yarn.proto.YarnProtos.ExecutionTypeProto; import org.apache.hadoop.yarn.proto.YarnProtos.ExecutionTypeRequestProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceTypesProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos; import org.apache.hadoop.yarn.proto.YarnServiceProtos.ContainerUpdateTypeProto; import org.apache.hadoop.yarn.server.api.ContainerType; @@ -433,6 +440,45 @@ public static UpdateContainerErrorPBImpl convertFromProtoFormat( convertToProtoFormat(UpdateContainerError t) { return ((UpdateContainerErrorPBImpl) t).getProto(); } + + /* + * ResourceTypes + */ + public static ResourceTypesProto converToProtoFormat(ResourceTypes e) { + return ResourceTypesProto.valueOf(e.name()); + } + + public static ResourceTypes convertFromProtoFormat(ResourceTypesProto e) { + return ResourceTypes.valueOf(e.name()); + } + + public static Map convertStringLongMapProtoListToMap( + List pList) { + Resource tmp = Resource.newInstance(0, 0); + Map ret = new HashMap<>(); + for (ResourceInformation entry : tmp.getResources()) { + ret.put(entry.getName(), 0L); + } + if (pList != null) { + for (YarnProtos.StringLongMapProto p : pList) { + ret.put(p.getKey(), p.getValue()); + } + } + return ret; + } + + public static List convertMapToStringLongMapProtoList( + Map map) { + List ret = new ArrayList<>(); + for (Map.Entry entry : map.entrySet()) { + YarnProtos.StringLongMapProto.Builder tmp = + YarnProtos.StringLongMapProto.newBuilder(); + tmp.setKey(entry.getKey()); + tmp.setValue(entry.getValue()); + ret.add(tmp.build()); + } + return ret; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourcePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourcePBImpl.java index 34109bec0f6..92beec7bfa3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourcePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourcePBImpl.java @@ -18,16 +18,30 @@ package org.apache.hadoop.yarn.api.records.impl.pb; - +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProtoOrBuilder; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceInformationProto; +import org.apache.hadoop.yarn.util.UnitsConversionUtil; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; + +import java.util.Map; + @Private @Unstable public class ResourcePBImpl extends Resource { + + private static final Log LOG = LogFactory.getLog(ResourcePBImpl.class); + ResourceProto proto = ResourceProto.getDefaultInstance(); ResourceProto.Builder builder = null; boolean viaProto = false; @@ -47,14 +61,17 @@ static ResourceProto getProto(Resource r) { public ResourcePBImpl() { builder = ResourceProto.newBuilder(); + initResources(); } public ResourcePBImpl(ResourceProto proto) { this.proto = proto; viaProto = true; + initResources(); } - + public ResourceProto getProto() { + mergeLocalToProto(); proto = viaProto ? proto : builder.build(); viaProto = true; return proto; @@ -70,13 +87,19 @@ private void maybeInitBuilder() { @Override @SuppressWarnings("deprecation") public int getMemory() { - return (int) getMemorySize(); + return (int) this.getMemorySize(); } @Override public long getMemorySize() { - ResourceProtoOrBuilder p = viaProto ? proto : builder; - return p.getMemory(); + // memory should always be present + ResourceInformation ri = resources[MEMORY_INDEX]; + + if (ri.getUnits().isEmpty()) { + return ri.getValue(); + } + return UnitsConversionUtil.convert(ri.getUnits(), + ResourceInformation.MEMORY_MB.getUnits(), ri.getValue()); } @Override @@ -88,18 +111,129 @@ public void setMemory(int memory) { @Override public void setMemorySize(long memory) { maybeInitBuilder(); - builder.setMemory(memory); + getResourceInformation(ResourceInformation.MEMORY_URI).setValue(memory); } @Override public int getVirtualCores() { - ResourceProtoOrBuilder p = viaProto ? proto : builder; - return p.getVirtualCores(); + // vcores should always be present + return (int) resources[VCORES_INDEX].getValue(); } @Override public void setVirtualCores(int vCores) { maybeInitBuilder(); - builder.setVirtualCores(vCores); + getResourceInformation(ResourceInformation.VCORES_URI).setValue(vCores); + } + + private void initResources() { + if (this.resources != null) { + return; + } + ResourceProtoOrBuilder p = viaProto ? proto : builder; + initResourcesMap(); + Map indexMap = ResourceUtils.getResourceTypeIndex(); + for (ResourceInformationProto entry : p.getResourceValueMapList()) { + ResourceTypes type = + entry.hasType() ? ProtoUtils.convertFromProtoFormat(entry.getType()) : + ResourceTypes.COUNTABLE; + + // When unit not specified in proto, use the default unit. + String units = + entry.hasUnits() ? entry.getUnits() : ResourceUtils.getDefaultUnit( + entry.getKey()); + long value = entry.hasValue() ? entry.getValue() : 0L; + ResourceInformation ri = ResourceInformation + .newInstance(entry.getKey(), units, value, type, 0L, Long.MAX_VALUE); + Integer index = indexMap.get(entry.getKey()); + if (index == null) { + LOG.warn("Got unknown resource type: " + ri.getName() + "; skipping"); + } else { + resources[index].setResourceType(ri.getResourceType()); + resources[index].setUnits(ri.getUnits()); + resources[index].setValue(value); + } + } + this.setMemorySize(p.getMemory()); + this.setVirtualCores(p.getVirtualCores()); + } + + @Override + public void setResourceInformation(String resource, + ResourceInformation resourceInformation) { + maybeInitBuilder(); + if (resource == null || resourceInformation == null) { + throw new IllegalArgumentException( + "resource and/or resourceInformation cannot be null"); + } + if (!resource.equals(resourceInformation.getName())) { + resourceInformation.setName(resource); + } + ResourceInformation storedResourceInfo = getResourceInformation(resource); + ResourceInformation.copy(resourceInformation, storedResourceInfo); + } + + @Override + public void setResourceValue(String resource, long value) + throws ResourceNotFoundException { + maybeInitBuilder(); + if (resource == null) { + throw new IllegalArgumentException("resource type object cannot be null"); + } + getResourceInformation(resource).setValue(value); + } + + @Override + public ResourceInformation getResourceInformation(String resource) + throws ResourceNotFoundException { + return super.getResourceInformation(resource); + } + + @Override + public long getResourceValue(String resource) + throws ResourceNotFoundException { + return super.getResourceValue(resource); + } + + private void initResourcesMap() { + if (resources == null) { + ResourceInformation[] types = ResourceUtils.getResourceTypesArray(); + if (types == null) { + throw new YarnRuntimeException( + "Got null return value from ResourceUtils.getResourceTypes()"); + } + + resources = new ResourceInformation[types.length]; + for (ResourceInformation entry : types) { + int index = ResourceUtils.getResourceTypeIndex().get(entry.getName()); + resources[index] = ResourceInformation.newInstance(entry); + } + } + } + + synchronized private void mergeLocalToBuilder() { + builder.clearResourceValueMap(); + if(resources != null && resources.length != 0) { + for (ResourceInformation resInfo : resources) { + ResourceInformationProto.Builder e = ResourceInformationProto + .newBuilder(); + e.setKey(resInfo.getName()); + e.setUnits(resInfo.getUnits()); + e.setType(ProtoUtils.converToProtoFormat(resInfo.getResourceType())); + e.setValue(resInfo.getValue()); + builder.addResourceValueMap(e); + } + } + builder.setMemory(this.getMemorySize()); + builder.setVirtualCores(this.getVirtualCores()); + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceRequestPBImpl.java index eba539599a8..3c2964595cf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceRequestPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceRequestPBImpl.java @@ -23,8 +23,10 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ProfileCapability; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.proto.YarnProtos.ProfileCapabilityProto; import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceRequestProto; @@ -40,6 +42,7 @@ public class ResourceRequestPBImpl extends ResourceRequest { private Priority priority = null; private Resource capability = null; private ExecutionTypeRequest executionTypeRequest = null; + private ProfileCapability profile = null; public ResourceRequestPBImpl() { @@ -52,7 +55,7 @@ public ResourceRequestPBImpl(ResourceRequestProto proto) { } public ResourceRequestProto getProto() { - mergeLocalToProto(); + mergeLocalToProto(); proto = viaProto ? proto : builder.build(); viaProto = true; return proto; @@ -69,6 +72,9 @@ private void mergeLocalToBuilder() { builder.setExecutionTypeRequest( ProtoUtils.convertToProtoFormat(this.executionTypeRequest)); } + if (this.profile != null) { + builder.setProfile(converToProtoFormat(this.profile)); + } } private void mergeLocalToProto() { @@ -229,7 +235,8 @@ public String toString() { + ", Location: " + getResourceName() + ", Relax Locality: " + getRelaxLocality() + ", Execution Type Request: " + getExecutionTypeRequest() - + ", Node Label Expression: " + getNodeLabelExpression() + "}"; + + ", Node Label Expression: " + getNodeLabelExpression() + + ", Resource Profile: " + getProfileCapability() + "}"; } @Override @@ -250,4 +257,34 @@ public void setNodeLabelExpression(String nodeLabelExpression) { } builder.setNodeLabelExpression(nodeLabelExpression); } + + @Override + public void setProfileCapability(ProfileCapability profileCapability) { + maybeInitBuilder(); + if (profile == null) { + builder.clearProfile(); + } + this.profile = profileCapability; + } + + @Override + public ProfileCapability getProfileCapability() { + if (profile != null) { + return profile; + } + ResourceRequestProtoOrBuilder p = viaProto ? proto : builder; + if (!p.hasProfile()) { + return null; + } + return new ProfileCapabilityPBImpl(p.getProfile()); + } + + private ProfileCapabilityProto converToProtoFormat( + ProfileCapability profileCapability) { + ProfileCapabilityPBImpl tmp = new ProfileCapabilityPBImpl(); + tmp.setProfileName(profileCapability.getProfileName()); + tmp.setProfileCapabilityOverride( + profileCapability.getProfileCapabilityOverride()); + return tmp.getProto(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceTypeInfoPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceTypeInfoPBImpl.java new file mode 100644 index 00000000000..17230e7dfa5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceTypeInfoPBImpl.java @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records.impl.pb; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; +import org.apache.hadoop.yarn.proto.YarnProtos; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceTypeInfoProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceTypesProto; + +/** + * {@code ResourceTypeInfoPBImpl} which implements the + * {@link ResourceTypeInfo} class which represents different resource types + * supported in YARN. + */ +@Private +@Unstable +public class ResourceTypeInfoPBImpl extends ResourceTypeInfo { + + ResourceTypeInfoProto proto = ResourceTypeInfoProto.getDefaultInstance(); + ResourceTypeInfoProto.Builder builder = null; + boolean viaProto = false; + + private String name = null; + private String defaultUnit = null; + private ResourceTypes resourceTypes = null; + + public ResourceTypeInfoPBImpl() { + builder = ResourceTypeInfoProto.newBuilder(); + } + + public ResourceTypeInfoPBImpl(ResourceTypeInfoProto proto) { + this.proto = proto; + viaProto = true; + } + + public ResourceTypeInfoProto getProto() { + mergeLocalToProto(); + return proto; + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void mergeLocalToBuilder() { + if (this.name != null) { + builder.setName(this.name); + } + if (this.defaultUnit != null) { + builder.setUnits(this.defaultUnit); + } + if (this.resourceTypes != null) { + builder.setType(convertToProtoFormat(this.resourceTypes)); + } + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = YarnProtos.ResourceTypeInfoProto.newBuilder(proto); + } + viaProto = false; + } + + @Override + public String getName() { + if (this.name != null) { + return this.name; + } + + YarnProtos.ResourceTypeInfoProtoOrBuilder p = viaProto ? proto : builder; + return p.getName(); + } + + @Override + public void setName(String rName) { + maybeInitBuilder(); + if (rName == null) { + builder.clearName(); + } + this.name = rName; + } + + @Override + public String getDefaultUnit() { + if (this.defaultUnit != null) { + return this.defaultUnit; + } + + YarnProtos.ResourceTypeInfoProtoOrBuilder p = viaProto ? proto : builder; + return p.getUnits(); + } + + @Override + public void setDefaultUnit(String rUnits) { + maybeInitBuilder(); + if (rUnits == null) { + builder.clearUnits(); + } + this.defaultUnit = rUnits; + } + + @Override + public ResourceTypes getResourceType() { + if (this.resourceTypes != null) { + return this.resourceTypes; + } + + YarnProtos.ResourceTypeInfoProtoOrBuilder p = viaProto ? proto : builder; + return convertFromProtoFormat(p.getType()); + } + + @Override + public void setResourceType(ResourceTypes type) { + maybeInitBuilder(); + if (type == null) { + builder.clearType(); + } + this.resourceTypes = type; + } + + public static ResourceTypesProto convertToProtoFormat(ResourceTypes e) { + return ResourceTypesProto.valueOf(e.name()); + } + + public static ResourceTypes convertFromProtoFormat(ResourceTypesProto e) { + return ResourceTypes.valueOf(e.name()); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/PerContainerLogFileInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/ContainerLogFileInfo.java similarity index 87% rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/PerContainerLogFileInfo.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/ContainerLogFileInfo.java index 867815fd6ca..b461ebbdf24 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/PerContainerLogFileInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/ContainerLogFileInfo.java @@ -19,7 +19,7 @@ package org.apache.hadoop.yarn.logaggregation; /** - * PerContainerLogFileInfo represents the meta data for a container log file, + * ContainerLogFileInfo represents the meta data for a container log file, * which includes: *
    *
  • The filename of the container log.
  • @@ -28,15 +28,15 @@ *
* */ -public class PerContainerLogFileInfo { +public class ContainerLogFileInfo { private String fileName; private String fileSize; private String lastModifiedTime; //JAXB needs this - public PerContainerLogFileInfo() {} + public ContainerLogFileInfo() {} - public PerContainerLogFileInfo(String fileName, String fileSize, + public ContainerLogFileInfo(String fileName, String fileSize, String lastModifiedTime) { this.setFileName(fileName); this.setFileSize(fileSize); @@ -83,10 +83,10 @@ public boolean equals(Object otherObj) { if (otherObj == this) { return true; } - if (!(otherObj instanceof PerContainerLogFileInfo)) { + if (!(otherObj instanceof ContainerLogFileInfo)) { return false; } - PerContainerLogFileInfo other = (PerContainerLogFileInfo)otherObj; + ContainerLogFileInfo other = (ContainerLogFileInfo)otherObj; return other.fileName.equals(fileName) && other.fileSize.equals(fileSize) && other.lastModifiedTime.equals(lastModifiedTime); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/ContainerLogMeta.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/ContainerLogMeta.java index 26a620e8c99..4c6b0de16fe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/ContainerLogMeta.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/ContainerLogMeta.java @@ -26,14 +26,14 @@ *
    *
  • The Container Id.
  • *
  • The NodeManager Id.
  • - *
  • A list of {@link PerContainerLogFileInfo}.
  • + *
  • A list of {@link ContainerLogFileInfo}.
  • *
* */ public class ContainerLogMeta { private String containerId; private String nodeId; - private List logMeta; + private List logMeta; public ContainerLogMeta(String containerId, String nodeId) { this.containerId = containerId; @@ -51,11 +51,11 @@ public String getContainerId() { public void addLogMeta(String fileName, String fileSize, String lastModificationTime) { - logMeta.add(new PerContainerLogFileInfo(fileName, fileSize, + logMeta.add(new ContainerLogFileInfo(fileName, fileSize, lastModificationTime)); } - public List getContainerLogMeta() { + public List getContainerLogMeta() { return this.logMeta; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogAggregationUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogAggregationUtils.java index 6d04c29406a..edf2cf3fbcf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogAggregationUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogAggregationUtils.java @@ -30,6 +30,9 @@ import com.google.common.annotations.VisibleForTesting; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; @Private public class LogAggregationUtils { @@ -195,6 +198,30 @@ public static RemoteIterator getRemoteNodeFileDir( return nodeFiles; } + /** + * Get all available log files under remote app log directory. + * @param conf the configuration + * @param appId the applicationId + * @param appOwner the application owner + * @param remoteRootLogDir the remote root log directory + * @param suffix the log directory suffix + * @return the list of available log files + * @throws IOException if there is no log file available + */ + public static List getRemoteNodeFileList( + Configuration conf, ApplicationId appId, String appOwner, + org.apache.hadoop.fs.Path remoteRootLogDir, String suffix) + throws IOException { + Path remoteAppLogDir = getRemoteAppLogDir(conf, appId, appOwner, + remoteRootLogDir, suffix); + List nodeFiles = new ArrayList<>(); + Path qualifiedLogDir = + FileContext.getFileContext(conf).makeQualified(remoteAppLogDir); + nodeFiles.addAll(Arrays.asList(FileContext.getFileContext( + qualifiedLogDir.toUri(), conf).util().listStatus(remoteAppLogDir))); + return nodeFiles; + } + /** * Get all available log files under remote app log directory. * @param conf the configuration diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogCLIHelpers.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogCLIHelpers.java index 0068eaeeb9a..97b78ec7c21 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogCLIHelpers.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogCLIHelpers.java @@ -22,8 +22,6 @@ import java.io.IOException; import java.io.PrintStream; import java.nio.file.AccessDeniedException; -import java.nio.file.Files; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -229,7 +227,7 @@ public int printAContainerLogMetadata(ContainerLogsRequest options, out.printf(PER_LOG_FILE_INFO_PATTERN, "LogFile", "LogLength", "LastModificationTime", "LogAggregationType"); out.println(StringUtils.repeat("=", containerString.length() * 2)); - for (PerContainerLogFileInfo logMeta : containerLogMeta + for (ContainerLogFileInfo logMeta : containerLogMeta .getContainerLogMeta()) { out.printf(PER_LOG_FILE_INFO_PATTERN, logMeta.getFileName(), logMeta.getFileSize(), logMeta.getLastModifiedTime(), "AGGREGATED"); @@ -345,20 +343,6 @@ private static void logDirNoAccessPermission(String remoteAppLogDir, + ". Error message found: " + errorMessage); } - @Private - public PrintStream createPrintStream(String localDir, String nodeId, - String containerId) throws IOException { - PrintStream out = System.out; - if(localDir != null && !localDir.isEmpty()) { - Path nodePath = new Path(localDir, LogAggregationUtils - .getNodeString(nodeId)); - Files.createDirectories(Paths.get(nodePath.toString())); - Path containerLogPath = new Path(nodePath, containerId); - out = new PrintStream(containerLogPath.toString(), "UTF-8"); - } - return out; - } - public void closePrintStream(PrintStream out) { if (out != System.out) { IOUtils.closeQuietly(out); @@ -379,7 +363,7 @@ public Set listContainerLogs(ContainerLogsRequest options) return logTypes; } for (ContainerLogMeta logMeta: containersLogMeta) { - for (PerContainerLogFileInfo fileInfo : logMeta.getContainerLogMeta()) { + for (ContainerLogFileInfo fileInfo : logMeta.getContainerLogMeta()) { logTypes.add(fileInfo.getFileName()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogToolUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogToolUtils.java index ddee445bb1d..90faa19e261 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogToolUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogToolUtils.java @@ -21,11 +21,15 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.io.PrintStream; import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.channels.WritableByteChannel; import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Paths; import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.Path; /** * This class contains several utility function which could be used in different @@ -158,4 +162,26 @@ public static void outputContainerLogThroughZeroCopy(String containerId, } } + + /** + * Create the container log file under given (local directory/nodeId) and + * return the PrintStream object. + * @param localDir the Local Dir + * @param nodeId the NodeId + * @param containerId the ContainerId + * @return the printStream object + * @throws IOException if an I/O error occurs + */ + public static PrintStream createPrintStream(String localDir, String nodeId, + String containerId) throws IOException { + PrintStream out = System.out; + if(localDir != null && !localDir.isEmpty()) { + Path nodePath = new Path(localDir, LogAggregationUtils + .getNodeString(nodeId)); + Files.createDirectories(Paths.get(nodePath.toString())); + Path containerLogPath = new Path(nodePath, containerId); + out = new PrintStream(containerLogPath.toString(), "UTF-8"); + } + return out; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/LogAggregationFileController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/LogAggregationFileController.java index 39f3dc339f5..5df900b9f64 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/LogAggregationFileController.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/LogAggregationFileController.java @@ -25,9 +25,6 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.OutputStream; -import java.io.PrintStream; -import java.nio.file.Files; -import java.nio.file.Paths; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Arrays; @@ -37,6 +34,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; @@ -91,6 +89,12 @@ public abstract class LogAggregationFileController { protected static final FsPermission APP_DIR_PERMISSIONS = FsPermission .createImmutable((short) 0770); + /** + * Umask for the log file. + */ + protected static final FsPermission APP_LOG_FILE_UMASK = FsPermission + .createImmutable((short) (0640 ^ 0777)); + // This is temporary solution. The configuration will be deleted once // we find a more scalable method to only write a single log file per LRS. private static final String NM_LOG_AGGREGATION_NUM_LOG_FILES_SIZE_PER_APP @@ -98,6 +102,11 @@ public abstract class LogAggregationFileController { private static final int DEFAULT_NM_LOG_AGGREGATION_NUM_LOG_FILES_SIZE_PER_APP = 30; + // This is temporary solution. The configuration will be deleted once we have + // the FileSystem API to check whether append operation is supported or not. + public static final String LOG_AGGREGATION_FS_SUPPORT_APPEND + = YarnConfiguration.YARN_PREFIX+ "log-aggregation.fs-support-append"; + protected Configuration conf; protected Path remoteRootLogDir; protected String remoteRootLogDirSuffix; @@ -178,19 +187,6 @@ public abstract void write(LogKey logKey, LogValue logValue) public abstract void postWrite(LogAggregationFileControllerContext record) throws Exception; - protected PrintStream createPrintStream(String localDir, String nodeId, - String containerId) throws IOException { - PrintStream out = System.out; - if(localDir != null && !localDir.isEmpty()) { - Path nodePath = new Path(localDir, LogAggregationUtils - .getNodeString(nodeId)); - Files.createDirectories(Paths.get(nodePath.toString())); - Path containerLogPath = new Path(nodePath, containerId); - out = new PrintStream(containerLogPath.toString(), "UTF-8"); - } - return out; - } - protected void closePrintStream(OutputStream out) { if (out != System.out) { IOUtils.cleanupWithLogger(LOG, out); @@ -481,4 +477,21 @@ public Object run() throws Exception { LOG.error("Failed to clean old logs", e); } } + + /** + * Create the aggregated log suffix. The LogAggregationFileController + * should call this to get the suffix and append the suffix to the end + * of each log. This would keep the aggregated log format consistent. + * + * @param fileName the File Name + * @return the aggregated log suffix String + */ + protected String aggregatedLogSuffix(String fileName) { + StringBuilder sb = new StringBuilder(); + String endOfFile = "End of LogType:" + fileName; + sb.append("\n" + endOfFile + "\n"); + sb.append(StringUtils.repeat("*", endOfFile.length() + 50) + + "\n\n"); + return sb.toString(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/IndexedFileAggregatedLogsBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/IndexedFileAggregatedLogsBlock.java new file mode 100644 index 00000000000..c4cbfda1327 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/IndexedFileAggregatedLogsBlock.java @@ -0,0 +1,275 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.logaggregation.filecontroller.ifile; + +import static org.apache.hadoop.yarn.webapp.YarnWebParams.APP_OWNER; +import static org.apache.hadoop.yarn.webapp.YarnWebParams.CONTAINER_ID; +import static org.apache.hadoop.yarn.webapp.YarnWebParams.CONTAINER_LOG_TYPE; +import static org.apache.hadoop.yarn.webapp.YarnWebParams.ENTITY_STRING; +import static org.apache.hadoop.yarn.webapp.YarnWebParams.NM_NODENAME; + +import com.google.inject.Inject; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.io.compress.Decompressor; +import org.apache.hadoop.io.file.tfile.BoundedRangeFileInputStream; +import org.apache.hadoop.io.file.tfile.Compression; +import org.apache.hadoop.io.file.tfile.Compression.Algorithm; +import org.apache.hadoop.yarn.api.records.ApplicationAccessType; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.logaggregation.LogAggregationUtils; +import org.apache.hadoop.yarn.logaggregation.filecontroller.LogAggregationHtmlBlock; +import org.apache.hadoop.yarn.logaggregation.filecontroller.ifile.LogAggregationIndexedFileController.IndexedFileLogMeta; +import org.apache.hadoop.yarn.logaggregation.filecontroller.ifile.LogAggregationIndexedFileController.IndexedLogsMeta; +import org.apache.hadoop.yarn.logaggregation.filecontroller.ifile.LogAggregationIndexedFileController.IndexedPerAggregationLogMeta; +import org.apache.hadoop.yarn.util.Times; +import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet; +import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet.PRE; + +/** + * The Aggregated Logs Block implementation for Indexed File. + */ +@InterfaceAudience.LimitedPrivate({"YARN", "MapReduce"}) +public class IndexedFileAggregatedLogsBlock extends LogAggregationHtmlBlock { + + private final LogAggregationIndexedFileController fileController; + private final Configuration conf; + + @Inject + public IndexedFileAggregatedLogsBlock(ViewContext ctx, + Configuration conf, + LogAggregationIndexedFileController fileController) { + super(ctx); + this.conf = conf; + this.fileController = fileController; + } + + @Override + protected void render(Block html) { + BlockParameters params = verifyAndParseParameters(html); + if (params == null) { + return; + } + + ApplicationId appId = params.getAppId(); + ContainerId containerId = params.getContainerId(); + NodeId nodeId = params.getNodeId(); + String appOwner = params.getAppOwner(); + String logEntity = params.getLogEntity(); + long start = params.getStartIndex(); + long end = params.getEndIndex(); + + List nodeFiles = null; + try { + nodeFiles = LogAggregationUtils + .getRemoteNodeFileList(conf, appId, appOwner, + this.fileController.getRemoteRootLogDir(), + this.fileController.getRemoteRootLogDirSuffix()); + } catch(Exception ex) { + html.h1("Unable to locate any logs for container " + + containerId.toString()); + LOG.error(ex.getMessage()); + return; + } + + Map checkSumFiles; + try { + checkSumFiles = fileController.filterFiles(nodeFiles, + LogAggregationIndexedFileController.CHECK_SUM_FILE_SUFFIX); + } catch (IOException ex) { + LOG.error("Error getting logs for " + logEntity, ex); + html.h1("Error getting logs for " + logEntity); + return; + } + + List fileToRead; + try { + fileToRead = fileController.getNodeLogFileToRead(nodeFiles, + nodeId.toString(), appId); + } catch (IOException ex) { + LOG.error("Error getting logs for " + logEntity, ex); + html.h1("Error getting logs for " + logEntity); + return; + } + + boolean foundLog = false; + String desiredLogType = $(CONTAINER_LOG_TYPE); + try { + for (FileStatus thisNodeFile : fileToRead) { + FileStatus checkSum = fileController.getAllChecksumFiles( + checkSumFiles, thisNodeFile.getPath().getName()); + long endIndex = -1; + if (checkSum != null) { + endIndex = fileController.loadIndexedLogsCheckSum( + checkSum.getPath()); + } + IndexedLogsMeta indexedLogsMeta = null; + try { + indexedLogsMeta = fileController.loadIndexedLogsMeta( + thisNodeFile.getPath(), endIndex); + } catch (Exception ex) { + // DO NOTHING + LOG.warn("Can not load log meta from the log file:" + + thisNodeFile.getPath()); + continue; + } + if (indexedLogsMeta == null) { + continue; + } + Map appAcls = indexedLogsMeta.getAcls(); + String user = indexedLogsMeta.getUser(); + String remoteUser = request().getRemoteUser(); + if (!checkAcls(conf, appId, user, appAcls, remoteUser)) { + html.h1().__("User [" + remoteUser + + "] is not authorized to view the logs for " + logEntity + + " in log file [" + thisNodeFile.getPath().getName() + "]") + .__(); + LOG.error("User [" + remoteUser + + "] is not authorized to view the logs for " + logEntity); + continue; + } + String compressAlgo = indexedLogsMeta.getCompressName(); + List candidates = new ArrayList<>(); + for (IndexedPerAggregationLogMeta logMeta + : indexedLogsMeta.getLogMetas()) { + for (Entry> meta + : logMeta.getLogMetas().entrySet()) { + for (IndexedFileLogMeta log : meta.getValue()) { + if (!log.getContainerId().equals(containerId.toString())) { + continue; + } + if (desiredLogType != null && !desiredLogType.isEmpty() + && !desiredLogType.equals(log.getFileName())) { + continue; + } + candidates.add(log); + } + } + } + if (candidates.isEmpty()) { + continue; + } + + Algorithm compressName = Compression.getCompressionAlgorithmByName( + compressAlgo); + Decompressor decompressor = compressName.getDecompressor(); + FileContext fileContext = FileContext.getFileContext( + thisNodeFile.getPath().toUri(), conf); + FSDataInputStream fsin = fileContext.open(thisNodeFile.getPath()); + int bufferSize = 65536; + for (IndexedFileLogMeta candidate : candidates) { + byte[] cbuf = new byte[bufferSize]; + InputStream in = null; + try { + in = compressName.createDecompressionStream( + new BoundedRangeFileInputStream(fsin, + candidate.getStartIndex(), + candidate.getFileCompressedSize()), + decompressor, + LogAggregationIndexedFileController.getFSInputBufferSize( + conf)); + long logLength = candidate.getFileSize(); + html.pre().__("\n\n").__(); + html.p().__("Log Type: " + candidate.getFileName()).__(); + html.p().__("Log Upload Time: " + Times.format( + candidate.getLastModificatedTime())).__(); + html.p().__("Log Length: " + Long.toString( + logLength)).__(); + long startIndex = start < 0 + ? logLength + start : start; + startIndex = startIndex < 0 ? 0 : startIndex; + startIndex = startIndex > logLength ? logLength : startIndex; + long endLogIndex = end < 0 + ? logLength + end : end; + endLogIndex = endLogIndex < 0 ? 0 : endLogIndex; + endLogIndex = endLogIndex > logLength ? logLength : endLogIndex; + endLogIndex = endLogIndex < startIndex ? + startIndex : endLogIndex; + long toRead = endLogIndex - startIndex; + if (toRead < logLength) { + html.p().__("Showing " + toRead + " bytes of " + logLength + + " total. Click ").a(url("logs", $(NM_NODENAME), + $(CONTAINER_ID), $(ENTITY_STRING), $(APP_OWNER), + candidate.getFileName(), "?start=0"), "here"). + __(" for the full log.").__(); + } + long totalSkipped = 0; + while (totalSkipped < start) { + long ret = in.skip(start - totalSkipped); + if (ret == 0) { + //Read one byte + int nextByte = in.read(); + // Check if we have reached EOF + if (nextByte == -1) { + throw new IOException("Premature EOF from container log"); + } + ret = 1; + } + totalSkipped += ret; + } + int len = 0; + int currentToRead = toRead > bufferSize ? bufferSize : (int) toRead; + PRE pre = html.pre(); + + while (toRead > 0 + && (len = in.read(cbuf, 0, currentToRead)) > 0) { + pre.__(new String(cbuf, 0, len, Charset.forName("UTF-8"))); + toRead = toRead - len; + currentToRead = toRead > bufferSize ? bufferSize : (int) toRead; + } + + pre.__(); + foundLog = true; + } catch (Exception ex) { + LOG.error("Error getting logs for " + logEntity, ex); + continue; + } finally { + IOUtils.closeQuietly(in); + } + } + } + if (!foundLog) { + if (desiredLogType.isEmpty()) { + html.h1("No logs available for container " + containerId.toString()); + } else { + html.h1("Unable to locate '" + desiredLogType + + "' log for container " + containerId.toString()); + } + } + } catch (RuntimeException e) { + throw e; + } catch (Exception ex) { + html.h1().__("Error getting logs for " + logEntity).__(); + LOG.error("Error getting logs for " + logEntity, ex); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/LogAggregationIndexedFileController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/LogAggregationIndexedFileController.java new file mode 100644 index 00000000000..243945e2c18 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/LogAggregationIndexedFileController.java @@ -0,0 +1,1057 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.logaggregation.filecontroller.ifile; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; +import com.google.common.collect.Sets; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.UUID; +import org.apache.commons.lang.SerializationUtils; +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CreateFlag; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.HarFs; +import org.apache.hadoop.fs.Options; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.SecureIOUtils; +import org.apache.hadoop.io.compress.Compressor; +import org.apache.hadoop.io.compress.Decompressor; +import org.apache.hadoop.io.file.tfile.BoundedRangeFileInputStream; +import org.apache.hadoop.io.file.tfile.Compression; +import org.apache.hadoop.io.file.tfile.SimpleBufferedOutputStream; +import org.apache.hadoop.io.file.tfile.Compression.Algorithm; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.api.records.ApplicationAccessType; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.logaggregation.ContainerLogAggregationType; +import org.apache.hadoop.yarn.logaggregation.ContainerLogMeta; +import org.apache.hadoop.yarn.logaggregation.ContainerLogsRequest; +import org.apache.hadoop.yarn.logaggregation.LogAggregationUtils; +import org.apache.hadoop.yarn.logaggregation.LogToolUtils; +import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogKey; +import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogValue; +import org.apache.hadoop.yarn.logaggregation.filecontroller.LogAggregationFileController; +import org.apache.hadoop.yarn.logaggregation.filecontroller.LogAggregationFileControllerContext; +import org.apache.hadoop.yarn.util.Times; +import org.apache.hadoop.yarn.webapp.View.ViewContext; +import org.apache.hadoop.yarn.webapp.view.HtmlBlock.Block; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The Indexed Log Aggregation File Format implementation. + * + */ +@Private +@Unstable +public class LogAggregationIndexedFileController + extends LogAggregationFileController { + + private static final Logger LOG = LoggerFactory.getLogger( + LogAggregationIndexedFileController.class); + private static final String FS_OUTPUT_BUF_SIZE_ATTR = + "indexedFile.fs.output.buffer.size"; + private static final String FS_INPUT_BUF_SIZE_ATTR = + "indexedFile.fs.input.buffer.size"; + private static final String FS_NUM_RETRIES_ATTR = + "indexedFile.fs.op.num-retries"; + private static final String FS_RETRY_INTERVAL_MS_ATTR = + "indexedFile.fs.retry-interval-ms"; + private static final int UUID_LENGTH = 36; + + @VisibleForTesting + public static final String CHECK_SUM_FILE_SUFFIX = "-checksum"; + + private int fsNumRetries = 3; + private long fsRetryInterval = 1000L; + private static final int VERSION = 1; + private IndexedLogsMeta indexedLogsMeta = null; + private IndexedPerAggregationLogMeta logsMetaInThisCycle; + private long logAggregationTimeInThisCycle; + private FSDataOutputStream fsDataOStream; + private Algorithm compressAlgo; + private CachedIndexedLogsMeta cachedIndexedLogsMeta = null; + private boolean logAggregationSuccessfullyInThisCyCle = false; + private long currentOffSet = 0; + private Path remoteLogCheckSumFile; + private FileContext fc; + private UserGroupInformation ugi; + private String uuid = null; + + public LogAggregationIndexedFileController() {} + + @Override + public void initInternal(Configuration conf) { + // Currently, we need the underlying File System to support append + // operation. Will remove this check after we finish + // LogAggregationIndexedFileController for non-append mode. + boolean append = conf.getBoolean(LOG_AGGREGATION_FS_SUPPORT_APPEND, true); + if (!append) { + throw new YarnRuntimeException("The configuration:" + + LOG_AGGREGATION_FS_SUPPORT_APPEND + " is set as False. We can only" + + " use LogAggregationIndexedFileController when the FileSystem " + + "support append operations."); + } + String remoteDirStr = String.format( + YarnConfiguration.LOG_AGGREGATION_REMOTE_APP_LOG_DIR_FMT, + this.fileControllerName); + String remoteDir = conf.get(remoteDirStr); + if (remoteDir == null || remoteDir.isEmpty()) { + remoteDir = conf.get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, + YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR); + } + this.remoteRootLogDir = new Path(remoteDir); + String suffix = String.format( + YarnConfiguration.LOG_AGGREGATION_REMOTE_APP_LOG_DIR_SUFFIX_FMT, + this.fileControllerName); + this.remoteRootLogDirSuffix = conf.get(suffix); + if (this.remoteRootLogDirSuffix == null + || this.remoteRootLogDirSuffix.isEmpty()) { + this.remoteRootLogDirSuffix = conf.get( + YarnConfiguration.NM_REMOTE_APP_LOG_DIR_SUFFIX, + YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR_SUFFIX) + + "-ifile"; + } + String compressName = conf.get( + YarnConfiguration.NM_LOG_AGG_COMPRESSION_TYPE, + YarnConfiguration.DEFAULT_NM_LOG_AGG_COMPRESSION_TYPE); + this.compressAlgo = Compression.getCompressionAlgorithmByName( + compressName); + this.fsNumRetries = conf.getInt(FS_NUM_RETRIES_ATTR, 3); + this.fsRetryInterval = conf.getLong(FS_RETRY_INTERVAL_MS_ATTR, 1000L); + } + + @Override + public void initializeWriter( + final LogAggregationFileControllerContext context) + throws IOException { + final UserGroupInformation userUgi = context.getUserUgi(); + final Map appAcls = context.getAppAcls(); + final String nodeId = context.getNodeId().toString(); + final Path remoteLogFile = context.getRemoteNodeLogFileForApp(); + this.ugi = userUgi; + logAggregationSuccessfullyInThisCyCle = false; + logsMetaInThisCycle = new IndexedPerAggregationLogMeta(); + logAggregationTimeInThisCycle = System.currentTimeMillis(); + logsMetaInThisCycle.setUploadTimeStamp(logAggregationTimeInThisCycle); + logsMetaInThisCycle.setRemoteNodeFile(remoteLogFile.getName()); + try { + userUgi.doAs(new PrivilegedExceptionAction() { + @Override + public Object run() throws Exception { + fc = FileContext.getFileContext( + remoteRootLogDir.toUri(), conf); + fc.setUMask(APP_LOG_FILE_UMASK); + boolean fileExist = fc.util().exists(remoteLogFile); + if (fileExist && context.isLogAggregationInRolling()) { + fsDataOStream = fc.create(remoteLogFile, + EnumSet.of(CreateFlag.APPEND), + new Options.CreateOpts[] {}); + if (uuid == null) { + FSDataInputStream fsDataInputStream = null; + try { + fsDataInputStream = fc.open(remoteLogFile); + byte[] b = new byte[UUID_LENGTH]; + int actual = fsDataInputStream.read(b); + if (actual != UUID_LENGTH) { + // Get an error when parse the UUID from existed log file. + // Simply OverWrite the existed log file and re-create the + // UUID. + fsDataOStream = fc.create(remoteLogFile, + EnumSet.of(CreateFlag.OVERWRITE), + new Options.CreateOpts[] {}); + uuid = UUID.randomUUID().toString(); + fsDataOStream.write(uuid.getBytes(Charset.forName("UTF-8"))); + fsDataOStream.flush(); + } else { + uuid = new String(b, Charset.forName("UTF-8")); + } + } finally { + IOUtils.cleanupWithLogger(LOG, fsDataInputStream); + } + } + // if the remote log file exists, but we do not have any + // indexedLogsMeta. We need to re-load indexedLogsMeta from + // the existing remote log file. If the re-load fails, we simply + // re-create a new indexedLogsMeta object. And will re-load + // the indexedLogsMeta from checksum file later. + if (indexedLogsMeta == null) { + try { + indexedLogsMeta = loadIndexedLogsMeta(remoteLogFile); + } catch (IOException ex) { + // DO NOTHING + } + } + } else { + fsDataOStream = fc.create(remoteLogFile, + EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), + new Options.CreateOpts[] {}); + if (uuid == null) { + uuid = UUID.randomUUID().toString(); + } + byte[] b = uuid.getBytes(Charset.forName("UTF-8")); + fsDataOStream.write(b); + fsDataOStream.flush(); + } + if (indexedLogsMeta == null) { + indexedLogsMeta = new IndexedLogsMeta(); + indexedLogsMeta.setVersion(VERSION); + indexedLogsMeta.setUser(userUgi.getShortUserName()); + indexedLogsMeta.setAcls(appAcls); + indexedLogsMeta.setNodeId(nodeId); + String compressName = conf.get( + YarnConfiguration.NM_LOG_AGG_COMPRESSION_TYPE, + YarnConfiguration.DEFAULT_NM_LOG_AGG_COMPRESSION_TYPE); + indexedLogsMeta.setCompressName(compressName); + } + final long currentAggregatedLogFileLength = fc + .getFileStatus(remoteLogFile).getLen(); + // only check the check-sum file when we are in append mode + if (context.isLogAggregationInRolling()) { + // check whether the checksum file exists to figure out + // whether the previous log aggregation process is successful + // and the aggregated log file is corrupted or not. + remoteLogCheckSumFile = new Path(remoteLogFile.getParent(), + (remoteLogFile.getName() + CHECK_SUM_FILE_SUFFIX)); + boolean exist = fc.util().exists(remoteLogCheckSumFile); + if (!exist) { + FSDataOutputStream checksumFileOutputStream = null; + try { + checksumFileOutputStream = fc.create(remoteLogCheckSumFile, + EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), + new Options.CreateOpts[] {}); + checksumFileOutputStream.writeLong( + currentAggregatedLogFileLength); + } finally { + IOUtils.cleanupWithLogger(LOG, checksumFileOutputStream); + } + } else { + FSDataInputStream checksumFileInputStream = null; + try { + checksumFileInputStream = fc.open(remoteLogCheckSumFile); + long endIndex = checksumFileInputStream.readLong(); + IndexedLogsMeta recoveredLogsMeta = loadIndexedLogsMeta( + remoteLogFile, endIndex); + if (recoveredLogsMeta == null) { + indexedLogsMeta.getLogMetas().clear(); + } else { + indexedLogsMeta = recoveredLogsMeta; + } + } finally { + IOUtils.cleanupWithLogger(LOG, checksumFileInputStream); + } + } + } + // append a simple character("\n") to move the writer cursor, so + // we could get the correct position when we call + // fsOutputStream.getStartPos() + final byte[] dummyBytes = "\n".getBytes(Charset.forName("UTF-8")); + fsDataOStream.write(dummyBytes); + fsDataOStream.flush(); + + if (fsDataOStream.getPos() >= (currentAggregatedLogFileLength + + dummyBytes.length)) { + currentOffSet = 0; + } else { + currentOffSet = currentAggregatedLogFileLength; + } + return null; + } + }); + } catch (Exception e) { + throw new IOException(e); + } + } + + @Override + public void closeWriter() { + IOUtils.cleanupWithLogger(LOG, this.fsDataOStream); + } + + @Override + public void write(LogKey logKey, LogValue logValue) throws IOException { + String containerId = logKey.toString(); + Set pendingUploadFiles = logValue + .getPendingLogFilesToUploadForThisContainer(); + List metas = new ArrayList<>(); + for (File logFile : pendingUploadFiles) { + FileInputStream in = null; + try { + in = SecureIOUtils.openForRead(logFile, logValue.getUser(), null); + } catch (IOException e) { + logErrorMessage(logFile, e); + IOUtils.cleanupWithLogger(LOG, in); + continue; + } + final long fileLength = logFile.length(); + IndexedFileOutputStreamState outputStreamState = null; + try { + outputStreamState = new IndexedFileOutputStreamState( + this.compressAlgo, this.fsDataOStream, conf, this.currentOffSet); + byte[] buf = new byte[65535]; + int len = 0; + long bytesLeft = fileLength; + while ((len = in.read(buf)) != -1) { + //If buffer contents within fileLength, write + if (len < bytesLeft) { + outputStreamState.getOutputStream().write(buf, 0, len); + bytesLeft-=len; + } else { + //else only write contents within fileLength, then exit early + outputStreamState.getOutputStream().write(buf, 0, + (int)bytesLeft); + break; + } + } + long newLength = logFile.length(); + if(fileLength < newLength) { + LOG.warn("Aggregated logs truncated by approximately "+ + (newLength-fileLength) +" bytes."); + } + logAggregationSuccessfullyInThisCyCle = true; + } catch (IOException e) { + String message = logErrorMessage(logFile, e); + if (outputStreamState != null && + outputStreamState.getOutputStream() != null) { + outputStreamState.getOutputStream().write( + message.getBytes(Charset.forName("UTF-8"))); + } + } finally { + IOUtils.cleanupWithLogger(LOG, in); + } + + IndexedFileLogMeta meta = new IndexedFileLogMeta(); + meta.setContainerId(containerId.toString()); + meta.setFileName(logFile.getName()); + if (outputStreamState != null) { + outputStreamState.finish(); + meta.setFileCompressedSize(outputStreamState.getCompressedSize()); + meta.setStartIndex(outputStreamState.getStartPos()); + meta.setFileSize(fileLength); + } + meta.setLastModificatedTime(logFile.lastModified()); + metas.add(meta); + } + logsMetaInThisCycle.addContainerLogMeta(containerId, metas); + } + + @Override + public void postWrite(LogAggregationFileControllerContext record) + throws Exception { + // always aggregate the previous logsMeta, and append them together + // at the end of the file + indexedLogsMeta.addLogMeta(logsMetaInThisCycle); + byte[] b = SerializationUtils.serialize(indexedLogsMeta); + this.fsDataOStream.write(b); + int length = b.length; + this.fsDataOStream.writeInt(length); + byte[] separator = this.uuid.getBytes(Charset.forName("UTF-8")); + this.fsDataOStream.write(separator); + if (logAggregationSuccessfullyInThisCyCle && + record.isLogAggregationInRolling()) { + deleteFileWithRetries(fc, ugi, remoteLogCheckSumFile); + } + } + + private void deleteFileWithRetries(final FileContext fileContext, + final UserGroupInformation userUgi, + final Path deletePath) throws Exception { + new FSAction() { + @Override + public Void run() throws Exception { + deleteFileWithPrivilege(fileContext, userUgi, deletePath); + return null; + } + }.runWithRetries(); + } + + private Object deleteFileWithPrivilege(final FileContext fileContext, + final UserGroupInformation userUgi, final Path fileToDelete) + throws Exception { + return userUgi.doAs(new PrivilegedExceptionAction() { + @Override + public Object run() throws Exception { + if (fileContext.util().exists(fileToDelete)) { + fileContext.delete(fileToDelete, false); + } + return null; + } + }); + } + + @Override + public boolean readAggregatedLogs(ContainerLogsRequest logRequest, + OutputStream os) throws IOException { + boolean findLogs = false; + boolean createPrintStream = (os == null); + ApplicationId appId = logRequest.getAppId(); + String nodeId = logRequest.getNodeId(); + String nodeIdStr = (nodeId == null || nodeId.isEmpty()) ? null + : LogAggregationUtils.getNodeString(nodeId); + List logTypes = new ArrayList<>(); + if (logRequest.getLogTypes() != null && !logRequest + .getLogTypes().isEmpty()) { + logTypes.addAll(logRequest.getLogTypes()); + } + String containerIdStr = logRequest.getContainerId(); + boolean getAllContainers = (containerIdStr == null + || containerIdStr.isEmpty()); + long size = logRequest.getBytes(); + List nodeFiles = LogAggregationUtils + .getRemoteNodeFileList(conf, appId, logRequest.getAppOwner(), + this.remoteRootLogDir, this.remoteRootLogDirSuffix); + if (nodeFiles.isEmpty()) { + throw new IOException("There is no available log fils for " + + "application:" + appId); + } + Map checkSumFiles = filterFiles( + nodeFiles, CHECK_SUM_FILE_SUFFIX); + List fileToRead = getNodeLogFileToRead( + nodeFiles, nodeIdStr, appId); + byte[] buf = new byte[65535]; + for (FileStatus thisNodeFile : fileToRead) { + String nodeName = thisNodeFile.getPath().getName(); + FileStatus checkSum = getAllChecksumFiles(checkSumFiles, + thisNodeFile.getPath().getName()); + long endIndex = -1; + if (checkSum != null) { + endIndex = loadIndexedLogsCheckSum(checkSum.getPath()); + } + IndexedLogsMeta indexedLogsMeta = null; + try { + indexedLogsMeta = loadIndexedLogsMeta(thisNodeFile.getPath(), + endIndex); + } catch (Exception ex) { + // DO NOTHING + LOG.warn("Can not load log meta from the log file:" + + thisNodeFile.getPath()); + continue; + } + if (indexedLogsMeta == null) { + continue; + } + String compressAlgo = indexedLogsMeta.getCompressName(); + List candidates = new ArrayList<>(); + for (IndexedPerAggregationLogMeta logMeta + : indexedLogsMeta.getLogMetas()) { + for (Entry> meta + : logMeta.getLogMetas().entrySet()) { + for (IndexedFileLogMeta log : meta.getValue()) { + if (!getAllContainers && !log.getContainerId() + .equals(containerIdStr)) { + continue; + } + if (logTypes != null && !logTypes.isEmpty() && + !logTypes.contains(log.getFileName())) { + continue; + } + candidates.add(log); + } + } + } + if (candidates.isEmpty()) { + continue; + } + + Algorithm compressName = Compression.getCompressionAlgorithmByName( + compressAlgo); + Decompressor decompressor = compressName.getDecompressor(); + FileContext fileContext = FileContext.getFileContext( + thisNodeFile.getPath().toUri(), conf); + FSDataInputStream fsin = fileContext.open(thisNodeFile.getPath()); + String currentContainer = ""; + for (IndexedFileLogMeta candidate : candidates) { + if (!candidate.getContainerId().equals(currentContainer)) { + if (createPrintStream) { + closePrintStream(os); + os = LogToolUtils.createPrintStream( + logRequest.getOutputLocalDir(), + thisNodeFile.getPath().getName(), + candidate.getContainerId()); + currentContainer = candidate.getContainerId(); + } + } + InputStream in = null; + try { + in = compressName.createDecompressionStream( + new BoundedRangeFileInputStream(fsin, + candidate.getStartIndex(), + candidate.getFileCompressedSize()), + decompressor, getFSInputBufferSize(conf)); + LogToolUtils.outputContainerLog(candidate.getContainerId(), + nodeName, candidate.getFileName(), candidate.getFileSize(), size, + Times.format(candidate.getLastModificatedTime()), + in, os, buf, ContainerLogAggregationType.AGGREGATED); + byte[] b = aggregatedLogSuffix(candidate.getFileName()) + .getBytes(Charset.forName("UTF-8")); + os.write(b, 0, b.length); + findLogs = true; + } catch (IOException e) { + System.err.println(e.getMessage()); + compressName.returnDecompressor(decompressor); + continue; + } finally { + os.flush(); + IOUtils.cleanupWithLogger(LOG, in); + } + } + } + return findLogs; + } + + // TODO: fix me if the remote file system does not support append operation. + @Override + public List readAggregatedLogsMeta( + ContainerLogsRequest logRequest) throws IOException { + List listOfLogsMeta = new ArrayList<>(); + List containersLogMeta = new ArrayList<>(); + String containerIdStr = logRequest.getContainerId(); + String nodeId = logRequest.getNodeId(); + ApplicationId appId = logRequest.getAppId(); + String appOwner = logRequest.getAppOwner(); + boolean getAllContainers = (containerIdStr == null || + containerIdStr.isEmpty()); + String nodeIdStr = (nodeId == null || nodeId.isEmpty()) ? null + : LogAggregationUtils.getNodeString(nodeId); + List nodeFiles = LogAggregationUtils + .getRemoteNodeFileList(conf, appId, appOwner, this.remoteRootLogDir, + this.remoteRootLogDirSuffix); + if (nodeFiles.isEmpty()) { + throw new IOException("There is no available log fils for " + + "application:" + appId); + } + Map checkSumFiles = filterFiles( + nodeFiles, CHECK_SUM_FILE_SUFFIX); + List fileToRead = getNodeLogFileToRead( + nodeFiles, nodeIdStr, appId); + for(FileStatus thisNodeFile : fileToRead) { + try { + FileStatus checkSum = getAllChecksumFiles(checkSumFiles, + thisNodeFile.getPath().getName()); + long endIndex = -1; + if (checkSum != null) { + endIndex = loadIndexedLogsCheckSum(checkSum.getPath()); + } + IndexedLogsMeta current = loadIndexedLogsMeta( + thisNodeFile.getPath(), endIndex); + if (current != null) { + listOfLogsMeta.add(current); + } + } catch (IOException ex) { + // DO NOTHING + LOG.warn("Can not get log meta from the log file:" + + thisNodeFile.getPath()); + } + } + for (IndexedLogsMeta indexedLogMeta : listOfLogsMeta) { + String curNodeId = indexedLogMeta.getNodeId(); + for (IndexedPerAggregationLogMeta logMeta : + indexedLogMeta.getLogMetas()) { + if (getAllContainers) { + for (Entry> log : logMeta + .getLogMetas().entrySet()) { + ContainerLogMeta meta = new ContainerLogMeta( + log.getKey().toString(), curNodeId); + for (IndexedFileLogMeta aMeta : log.getValue()) { + meta.addLogMeta(aMeta.getFileName(), Long.toString( + aMeta.getFileSize()), + Times.format(aMeta.getLastModificatedTime())); + } + containersLogMeta.add(meta); + } + } else if (logMeta.getContainerLogMeta(containerIdStr) != null) { + ContainerLogMeta meta = new ContainerLogMeta(containerIdStr, + curNodeId); + for (IndexedFileLogMeta log : + logMeta.getContainerLogMeta(containerIdStr)) { + meta.addLogMeta(log.getFileName(), Long.toString( + log.getFileSize()), + Times.format(log.getLastModificatedTime())); + } + containersLogMeta.add(meta); + } + } + } + Collections.sort(containersLogMeta, new Comparator() { + @Override + public int compare(ContainerLogMeta o1, ContainerLogMeta o2) { + return o1.getContainerId().compareTo(o2.getContainerId()); + } + }); + return containersLogMeta; + } + + @Private + public Map filterFiles( + List fileList, final String suffix) throws IOException { + Map checkSumFiles = new HashMap<>(); + Set status = new HashSet(fileList); + Iterable mask = + Iterables.filter(status, new Predicate() { + @Override + public boolean apply(FileStatus next) { + return next.getPath().getName().endsWith( + suffix); + } + }); + status = Sets.newHashSet(mask); + for (FileStatus file : status) { + checkSumFiles.put(file.getPath().getName(), file); + } + return checkSumFiles; + } + + @Private + public List getNodeLogFileToRead( + List nodeFiles, String nodeId, ApplicationId appId) + throws IOException { + List listOfFiles = new ArrayList<>(); + List files = new ArrayList<>(nodeFiles); + for (FileStatus file : files) { + String nodeName = file.getPath().getName(); + if ((nodeId == null || nodeId.isEmpty() + || nodeName.contains(LogAggregationUtils + .getNodeString(nodeId))) && !nodeName.endsWith( + LogAggregationUtils.TMP_FILE_SUFFIX) && + !nodeName.endsWith(CHECK_SUM_FILE_SUFFIX)) { + if (nodeName.equals(appId + ".har")) { + Path p = new Path("har:///" + file.getPath().toUri().getRawPath()); + files = Arrays.asList(HarFs.get(p.toUri(), conf).listStatus(p)); + continue; + } + listOfFiles.add(file); + } + } + return listOfFiles; + } + + @Private + public FileStatus getAllChecksumFiles(Map fileMap, + String fileName) { + for (Entry file : fileMap.entrySet()) { + if (file.getKey().startsWith(fileName) && file.getKey() + .endsWith(CHECK_SUM_FILE_SUFFIX)) { + return file.getValue(); + } + } + return null; + } + + @Override + public void renderAggregatedLogsBlock(Block html, ViewContext context) { + IndexedFileAggregatedLogsBlock block = new IndexedFileAggregatedLogsBlock( + context, this.conf, this); + block.render(html); + } + + @Override + public String getApplicationOwner(Path aggregatedLogPath) + throws IOException { + if (this.cachedIndexedLogsMeta == null + || !this.cachedIndexedLogsMeta.getRemoteLogPath() + .equals(aggregatedLogPath)) { + this.cachedIndexedLogsMeta = new CachedIndexedLogsMeta( + loadIndexedLogsMeta(aggregatedLogPath), aggregatedLogPath); + } + return this.cachedIndexedLogsMeta.getCachedIndexedLogsMeta().getUser(); + } + + @Override + public Map getApplicationAcls( + Path aggregatedLogPath) throws IOException { + if (this.cachedIndexedLogsMeta == null + || !this.cachedIndexedLogsMeta.getRemoteLogPath() + .equals(aggregatedLogPath)) { + this.cachedIndexedLogsMeta = new CachedIndexedLogsMeta( + loadIndexedLogsMeta(aggregatedLogPath), aggregatedLogPath); + } + return this.cachedIndexedLogsMeta.getCachedIndexedLogsMeta().getAcls(); + } + + @Override + public Path getRemoteAppLogDir(ApplicationId appId, String user) + throws IOException { + return LogAggregationUtils.getRemoteAppLogDir(conf, appId, user, + this.remoteRootLogDir, this.remoteRootLogDirSuffix); + } + + @Private + public IndexedLogsMeta loadIndexedLogsMeta(Path remoteLogPath, long end) + throws IOException { + FileContext fileContext = + FileContext.getFileContext(remoteLogPath.toUri(), conf); + FSDataInputStream fsDataIStream = null; + try { + fsDataIStream = fileContext.open(remoteLogPath); + if (end == 0) { + return null; + } + long fileLength = end < 0 ? fileContext.getFileStatus( + remoteLogPath).getLen() : end; + fsDataIStream.seek(fileLength - Integer.SIZE/ Byte.SIZE - UUID_LENGTH); + int offset = fsDataIStream.readInt(); + byte[] array = new byte[offset]; + fsDataIStream.seek( + fileLength - offset - Integer.SIZE/ Byte.SIZE - UUID_LENGTH); + int actual = fsDataIStream.read(array); + if (actual != offset) { + throw new IOException("Error on loading log meta from " + + remoteLogPath); + } + return (IndexedLogsMeta)SerializationUtils + .deserialize(array); + } finally { + IOUtils.cleanupWithLogger(LOG, fsDataIStream); + } + } + + private IndexedLogsMeta loadIndexedLogsMeta(Path remoteLogPath) + throws IOException { + return loadIndexedLogsMeta(remoteLogPath, -1); + } + + @Private + public long loadIndexedLogsCheckSum(Path remoteLogCheckSumPath) + throws IOException { + FileContext fileContext = + FileContext.getFileContext(remoteLogCheckSumPath.toUri(), conf); + FSDataInputStream fsDataIStream = null; + try { + fsDataIStream = fileContext.open(remoteLogCheckSumPath); + return fsDataIStream.readLong(); + } finally { + IOUtils.cleanupWithLogger(LOG, fsDataIStream); + } + } + + /** + * This IndexedLogsMeta includes all the meta information + * for the aggregated log file. + */ + @Private + @VisibleForTesting + public static class IndexedLogsMeta implements Serializable { + + private static final long serialVersionUID = 5439875373L; + private int version; + private String user; + private String compressName; + private Map acls; + private String nodeId; + private List logMetas = new ArrayList<>(); + + public int getVersion() { + return this.version; + } + + public void setVersion(int version) { + this.version = version; + } + + public String getUser() { + return this.user; + } + + public void setUser(String user) { + this.user = user; + } + + public Map getAcls() { + return this.acls; + } + + public void setAcls(Map acls) { + this.acls = acls; + } + + public String getCompressName() { + return compressName; + } + + public void setCompressName(String compressName) { + this.compressName = compressName; + } + + public String getNodeId() { + return nodeId; + } + + public void setNodeId(String nodeId) { + this.nodeId = nodeId; + } + + public void addLogMeta(IndexedPerAggregationLogMeta logMeta) { + logMetas.add(logMeta); + } + + public List getLogMetas() { + return logMetas; + } + } + + /** + * This IndexedPerAggregationLogMeta includes the meta information + * for all files which would be aggregated in one + * Log aggregation cycle. + */ + public static class IndexedPerAggregationLogMeta implements Serializable { + private static final long serialVersionUID = 3929298383L; + private String remoteNodeLogFileName; + private Map> logMetas = new HashMap<>(); + private long uploadTimeStamp; + + public String getRemoteNodeFile() { + return remoteNodeLogFileName; + } + public void setRemoteNodeFile(String remoteNodeLogFileName) { + this.remoteNodeLogFileName = remoteNodeLogFileName; + } + + public void addContainerLogMeta(String containerId, + List logMeta) { + logMetas.put(containerId, logMeta); + } + + public List getContainerLogMeta(String containerId) { + return logMetas.get(containerId); + } + + public Map> getLogMetas() { + return logMetas; + } + + public long getUploadTimeStamp() { + return uploadTimeStamp; + } + + public void setUploadTimeStamp(long uploadTimeStamp) { + this.uploadTimeStamp = uploadTimeStamp; + } + } + + /** + * This IndexedFileLogMeta includes the meta information + * for a single file which would be aggregated in one + * Log aggregation cycle. + * + */ + @Private + @VisibleForTesting + public static class IndexedFileLogMeta implements Serializable { + private static final long serialVersionUID = 1L; + private String containerId; + private String fileName; + private long fileSize; + private long fileCompressedSize; + private long lastModificatedTime; + private long startIndex; + + public String getFileName() { + return fileName; + } + public void setFileName(String fileName) { + this.fileName = fileName; + } + + public long getFileSize() { + return fileSize; + } + public void setFileSize(long fileSize) { + this.fileSize = fileSize; + } + + public long getFileCompressedSize() { + return fileCompressedSize; + } + public void setFileCompressedSize(long fileCompressedSize) { + this.fileCompressedSize = fileCompressedSize; + } + + public long getLastModificatedTime() { + return lastModificatedTime; + } + public void setLastModificatedTime(long lastModificatedTime) { + this.lastModificatedTime = lastModificatedTime; + } + + public long getStartIndex() { + return startIndex; + } + public void setStartIndex(long startIndex) { + this.startIndex = startIndex; + } + + public String getContainerId() { + return containerId; + } + public void setContainerId(String containerId) { + this.containerId = containerId; + } + } + + private static String logErrorMessage(File logFile, Exception e) { + String message = "Error aggregating log file. Log file : " + + logFile.getAbsolutePath() + ". " + e.getMessage(); + LOG.error(message, e); + return message; + } + + private static class IndexedFileOutputStreamState { + private final Algorithm compressAlgo; + private Compressor compressor; + private final FSDataOutputStream fsOut; + private long posStart; + private final SimpleBufferedOutputStream fsBufferedOutput; + private OutputStream out; + private long offset; + + IndexedFileOutputStreamState(Algorithm compressionName, + FSDataOutputStream fsOut, Configuration conf, long offset) + throws IOException { + this.compressAlgo = compressionName; + this.fsOut = fsOut; + this.offset = offset; + this.posStart = fsOut.getPos(); + + BytesWritable fsOutputBuffer = new BytesWritable(); + fsOutputBuffer.setCapacity(LogAggregationIndexedFileController + .getFSOutputBufferSize(conf)); + + this.fsBufferedOutput = new SimpleBufferedOutputStream(this.fsOut, + fsOutputBuffer.getBytes()); + + this.compressor = compressAlgo.getCompressor(); + + try { + this.out = compressAlgo.createCompressionStream( + fsBufferedOutput, compressor, 0); + } catch (IOException e) { + compressAlgo.returnCompressor(compressor); + throw e; + } + } + + OutputStream getOutputStream() { + return out; + } + + long getCurrentPos() throws IOException { + return fsOut.getPos() + fsBufferedOutput.size(); + } + + long getStartPos() { + return posStart + offset; + } + + long getCompressedSize() throws IOException { + long ret = getCurrentPos() - posStart; + return ret; + } + + void finish() throws IOException { + try { + if (out != null) { + out.flush(); + out = null; + } + } finally { + compressAlgo.returnCompressor(compressor); + compressor = null; + } + } + } + + private static class CachedIndexedLogsMeta { + private final Path remoteLogPath; + private final IndexedLogsMeta indexedLogsMeta; + CachedIndexedLogsMeta(IndexedLogsMeta indexedLogsMeta, + Path remoteLogPath) { + this.indexedLogsMeta = indexedLogsMeta; + this.remoteLogPath = remoteLogPath; + } + + public Path getRemoteLogPath() { + return this.remoteLogPath; + } + + public IndexedLogsMeta getCachedIndexedLogsMeta() { + return this.indexedLogsMeta; + } + } + + @Private + public static int getFSOutputBufferSize(Configuration conf) { + return conf.getInt(FS_OUTPUT_BUF_SIZE_ATTR, 256 * 1024); + } + + @Private + public static int getFSInputBufferSize(Configuration conf) { + return conf.getInt(FS_INPUT_BUF_SIZE_ATTR, 256 * 1024); + } + + private abstract class FSAction { + abstract T run() throws Exception; + + T runWithRetries() throws Exception { + int retry = 0; + while (true) { + try { + return run(); + } catch (IOException e) { + LOG.info("Exception while executing an FS operation.", e); + if (++retry > fsNumRetries) { + LOG.info("Maxed out FS retries. Giving up!"); + throw e; + } + LOG.info("Retrying operation on FS. Retry no. " + retry); + Thread.sleep(fsRetryInterval); + } + } + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/package-info.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/package-info.java new file mode 100644 index 00000000000..08ddecef5db --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +@InterfaceAudience.Public +package org.apache.hadoop.yarn.logaggregation.filecontroller.ifile; +import org.apache.hadoop.classification.InterfaceAudience; + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/tfile/LogAggregationTFileController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/tfile/LogAggregationTFileController.java index aaed538fc01..989b3266c60 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/tfile/LogAggregationTFileController.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/tfile/LogAggregationTFileController.java @@ -27,7 +27,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; -import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.commons.math3.util.Pair; @@ -192,7 +191,7 @@ public boolean readAggregatedLogs(ContainerLogsRequest logRequest, while (valueStream != null) { if (getAllContainers || (key.toString().equals(containerIdStr))) { if (createPrintStream) { - os = createPrintStream( + os = LogToolUtils.createPrintStream( logRequest.getOutputLocalDir(), thisNodeFile.getPath().getName(), key.toString()); } @@ -209,12 +208,7 @@ public boolean readAggregatedLogs(ContainerLogsRequest logRequest, Times.format(thisNodeFile.getModificationTime()), valueStream, os, buf, ContainerLogAggregationType.AGGREGATED); - StringBuilder sb = new StringBuilder(); - String endOfFile = "End of LogType:" + fileType; - sb.append("\n" + endOfFile + "\n"); - sb.append(StringUtils.repeat("*", endOfFile.length() + 50) - + "\n\n"); - byte[] b = sb.toString().getBytes( + byte[] b = aggregatedLogSuffix(fileType).getBytes( Charset.forName("UTF-8")); os.write(b, 0, b.length); findLogs = true; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/MultiStateTransitionListener.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/MultiStateTransitionListener.java new file mode 100644 index 00000000000..1a28fc50002 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/MultiStateTransitionListener.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.state; + +import java.util.ArrayList; +import java.util.List; + +/** + * A {@link StateTransitionListener} that dispatches the pre and post + * state transitions to multiple registered listeners. + * NOTE: The registered listeners are called in a for loop. Clients should + * know that a listener configured earlier might prevent a later listener + * from being called, if for instance it throws an un-caught Exception. + */ +public abstract class MultiStateTransitionListener + > implements + StateTransitionListener { + + private final List> listeners = + new ArrayList<>(); + + /** + * Add a listener to the list of listeners. + * @param listener A listener. + */ + public void addListener(StateTransitionListener + listener) { + listeners.add(listener); + } + + @Override + public void preTransition(OPERAND op, STATE beforeState, + EVENT eventToBeProcessed) { + for (StateTransitionListener listener : listeners) { + listener.preTransition(op, beforeState, eventToBeProcessed); + } + } + + @Override + public void postTransition(OPERAND op, STATE beforeState, STATE afterState, + EVENT processedEvent) { + for (StateTransitionListener listener : listeners) { + listener.postTransition(op, beforeState, afterState, processedEvent); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java index 5b76ce8fb52..4bb005c0536 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java @@ -391,6 +391,21 @@ public STATE doTransition(OPERAND operand, STATE oldState, } } + /** + * A StateMachine that accepts a transition listener. + * @param operand the object upon which the returned + * {@link StateMachine} will operate. + * @param initialState the state in which the returned + * {@link StateMachine} will start. + * @param listener An implementation of a {@link StateTransitionListener}. + * @return A (@link StateMachine}. + */ + public StateMachine + make(OPERAND operand, STATE initialState, + StateTransitionListener listener) { + return new InternalStateMachine(operand, initialState, listener); + } + /* * @return a {@link StateMachine} that starts in * {@code initialState} and whose {@link Transition} s are @@ -424,14 +439,36 @@ public StateMachine make(OPERAND operand) { return new InternalStateMachine(operand, defaultInitialState); } + private static class NoopStateTransitionListener + implements StateTransitionListener { + @Override + public void preTransition(Object op, Enum beforeState, + Object eventToBeProcessed) { } + + @Override + public void postTransition(Object op, Enum beforeState, Enum afterState, + Object processedEvent) { } + } + + private static final NoopStateTransitionListener NOOP_LISTENER = + new NoopStateTransitionListener(); + private class InternalStateMachine implements StateMachine { private final OPERAND operand; private STATE currentState; + private final StateTransitionListener listener; InternalStateMachine(OPERAND operand, STATE initialState) { + this(operand, initialState, null); + } + + InternalStateMachine(OPERAND operand, STATE initialState, + StateTransitionListener transitionListener) { this.operand = operand; this.currentState = initialState; + this.listener = + (transitionListener == null) ? NOOP_LISTENER : transitionListener; if (!optimized) { maybeMakeStateMachineTable(); } @@ -445,8 +482,11 @@ public synchronized STATE getCurrentState() { @Override public synchronized STATE doTransition(EVENTTYPE eventType, EVENT event) throws InvalidStateTransitionException { + listener.preTransition(operand, currentState, event); + STATE oldState = currentState; currentState = StateMachineFactory.this.doTransition (operand, currentState, eventType, event); + listener.postTransition(operand, oldState, currentState, event); return currentState; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateTransitionListener.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateTransitionListener.java new file mode 100644 index 00000000000..657c19398a1 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateTransitionListener.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.state; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * A State Transition Listener. + * It exposes a pre and post transition hook called before and + * after the transition. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public interface StateTransitionListener + > { + + /** + * Pre Transition Hook. This will be called before transition. + * @param op Operand. + * @param beforeState State before transition. + * @param eventToBeProcessed Incoming Event. + */ + void preTransition(OPERAND op, STATE beforeState, EVENT eventToBeProcessed); + + /** + * Post Transition Hook. This will be called after the transition. + * @param op Operand. + * @param beforeState State before transition. + * @param afterState State after transition. + * @param processedEvent Processed Event. + */ + void postTransition(OPERAND op, STATE beforeState, STATE afterState, + EVENT processedEvent); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/StringHelper.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/StringHelper.java index 0240fbcd59f..331be308e2e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/StringHelper.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/StringHelper.java @@ -20,9 +20,15 @@ import com.google.common.base.Joiner; import com.google.common.base.Splitter; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import java.util.regex.Pattern; import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; /** * Common string manipulation helpers @@ -174,4 +180,34 @@ private static void uappend(StringBuilder sb, String part) { } sb.append(part); } + + public static String getResourceSecondsString(Map targetMap) { + List strings = new ArrayList<>(targetMap.size()); + //completed app report in the timeline server doesn't have usage report + Long memorySeconds = 0L; + Long vcoreSeconds = 0L; + if (targetMap.containsKey(ResourceInformation.MEMORY_MB.getName())) { + memorySeconds = targetMap.get(ResourceInformation.MEMORY_MB.getName()); + } + if (targetMap.containsKey(ResourceInformation.VCORES.getName())) { + vcoreSeconds = targetMap.get(ResourceInformation.VCORES.getName()); + } + strings.add(memorySeconds + " MB-seconds"); + strings.add(vcoreSeconds + " vcore-seconds"); + Map tmp = ResourceUtils.getResourceTypes(); + if (targetMap.size() > 2) { + for (Map.Entry entry : targetMap.entrySet()) { + if (!entry.getKey().equals(ResourceInformation.MEMORY_MB.getName()) + && !entry.getKey().equals(ResourceInformation.VCORES.getName())) { + String units = ""; + if (tmp.containsKey(entry.getKey())) { + units = tmp.get(entry.getKey()).getUnits(); + } + strings.add(entry.getValue() + " " + entry.getKey() + "-" + units + + "seconds"); + } + } + } + return String.join(", ", strings); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java index bdf60bd9a5b..7f155e7a40e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java @@ -123,8 +123,7 @@ public Resource multiplyAndNormalizeDown(Resource r, double by, } @Override - public boolean fitsIn(Resource cluster, - Resource smaller, Resource bigger) { + public boolean fitsIn(Resource smaller, Resource bigger) { return smaller.getMemorySize() <= bigger.getMemorySize(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java index 7697e1dfc33..ca828a5251b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java @@ -22,136 +22,395 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.util.UnitsConversionUtil; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.Arrays; /** - * A {@link ResourceCalculator} which uses the concept of + * A {@link ResourceCalculator} which uses the concept of * dominant resource to compare multi-dimensional resources. * - * Essentially the idea is that the in a multi-resource environment, - * the resource allocation should be determined by the dominant share - * of an entity (user or queue), which is the maximum share that the - * entity has been allocated of any resource. - * - * In a nutshell, it seeks to maximize the minimum dominant share across - * all entities. - * + * Essentially the idea is that the in a multi-resource environment, + * the resource allocation should be determined by the dominant share + * of an entity (user or queue), which is the maximum share that the + * entity has been allocated of any resource. + * + * In a nutshell, it seeks to maximize the minimum dominant share across + * all entities. + * * For example, if user A runs CPU-heavy tasks and user B runs - * memory-heavy tasks, it attempts to equalize CPU share of user A - * with Memory-share of user B. - * + * memory-heavy tasks, it attempts to equalize CPU share of user A + * with Memory-share of user B. + * * In the single resource case, it reduces to max-min fairness for that resource. - * + * * See the Dominant Resource Fairness paper for more details: * www.cs.berkeley.edu/~matei/papers/2011/nsdi_drf.pdf */ @Private @Unstable public class DominantResourceCalculator extends ResourceCalculator { - private static final Log LOG = - LogFactory.getLog(DominantResourceCalculator.class); + static final Log LOG = LogFactory.getLog(DominantResourceCalculator.class); + + public DominantResourceCalculator() { + } + + /** + * Compare two resources - if the value for every resource type for the lhs + * is greater than that of the rhs, return 1. If the value for every resource + * type in the lhs is less than the rhs, return -1. Otherwise, return 0 + * + * @param lhs resource to be compared + * @param rhs resource to be compared + * @return 0, 1, or -1 + */ + private int compare(Resource lhs, Resource rhs) { + boolean lhsGreater = false; + boolean rhsGreater = false; + int ret = 0; + + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation lhsResourceInformation = lhs + .getResourceInformation(i); + ResourceInformation rhsResourceInformation = rhs + .getResourceInformation(i); + int diff = lhsResourceInformation.compareTo(rhsResourceInformation); + if (diff >= 1) { + lhsGreater = true; + } else if (diff <= -1) { + rhsGreater = true; + } + } + if (lhsGreater && rhsGreater) { + ret = 0; + } else if (lhsGreater) { + ret = 1; + } else if (rhsGreater) { + ret = -1; + } + return ret; + } @Override public int compare(Resource clusterResource, Resource lhs, Resource rhs, boolean singleType) { - if (lhs.equals(rhs)) { return 0; } - + if (isInvalidDivisor(clusterResource)) { - if ((lhs.getMemorySize() < rhs.getMemorySize() && - lhs.getVirtualCores() > rhs.getVirtualCores()) || - (lhs.getMemorySize() > rhs.getMemorySize() && - lhs.getVirtualCores() < rhs.getVirtualCores())) { - return 0; - } else if (lhs.getMemorySize() > rhs.getMemorySize() - || lhs.getVirtualCores() > rhs.getVirtualCores()) { - return 1; - } else if (lhs.getMemorySize() < rhs.getMemorySize() - || lhs.getVirtualCores() < rhs.getVirtualCores()) { - return -1; - } + return this.compare(lhs, rhs); } - float l = getResourceAsValue(clusterResource, lhs, true); - float r = getResourceAsValue(clusterResource, rhs, true); - - if (l < r) { - return -1; - } else if (l > r) { - return 1; - } else if (!singleType) { - l = getResourceAsValue(clusterResource, lhs, false); - r = getResourceAsValue(clusterResource, rhs, false); - if (l < r) { - return -1; - } else if (l > r) { - return 1; + // We have to calculate the shares for all resource types for both + // resources and then look for which resource has the biggest + // share overall. + ResourceInformation[] clusterRes = clusterResource.getResources(); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + + // If array creation shows up as a time sink, these arrays could be cached + // because they're always the same length. + double[] lhsShares = new double[maxLength]; + double[] rhsShares = new double[maxLength]; + double diff; + + try { + if (singleType) { + double[] max = new double[2]; + + calculateShares(clusterRes, lhs, rhs, lhsShares, rhsShares, max); + + diff = max[0] - max[1]; + } else if (maxLength == 2) { + // Special case to handle the common scenario of only CPU and memory + // so that we can optimize for performance + diff = calculateSharesForTwoMandatoryResources(clusterRes, lhs, rhs, + lhsShares, rhsShares); + } else { + calculateShares(clusterRes, lhs, rhs, lhsShares, rhsShares); + + Arrays.sort(lhsShares); + Arrays.sort(rhsShares); + + diff = compareShares(lhsShares, rhsShares); } + } catch (ArrayIndexOutOfBoundsException ex) { + StringWriter out = new StringWriter(); // No need to close a StringWriter + ex.printStackTrace(new PrintWriter(out)); + + LOG.error("A problem was encountered while calculating resource " + + "availability that should not occur under normal circumstances. " + + "Please report this error to the Hadoop community by opening a " + + "JIRA ticket at http://issues.apache.org/jira and including the " + + "following information:\n* Exception encountered: " + out + "* " + + "Cluster resources: " + Arrays.toString(clusterRes) + "\n* " + + "LHS resource: " + Arrays.toString(lhs.getResources()) + "\n* " + + "RHS resource: " + Arrays.toString(rhs.getResources())); + LOG.error("The resource manager is in an inconsistent state. It is safe " + + "for the resource manager to be restarted as the error encountered " + + "should be transitive. If high availability is enabled, failing " + + "over to a standby resource manager is also safe."); + throw new YarnRuntimeException("A problem was encountered while " + + "calculating resource availability that should not occur under " + + "normal circumstances. Please see the log for more information.", + ex); } - - return 0; + + return (int) Math.signum(diff); } /** - * Use 'dominant' for now since we only have 2 resources - gives us a slight - * performance boost. - * - * Once we add more resources, we'll need a more complicated (and slightly - * less performant algorithm). + * Calculate the shares for {@code first} and {@code second} according to + * {@code clusterRes}, and store the results in {@code firstShares} and + * {@code secondShares}, respectively. All parameters must be non-null. + * @param clusterRes the array of ResourceInformation instances that + * represents the cluster's maximum resources + * @param first the first resource to compare + * @param second the second resource to compare + * @param firstShares an array to store the shares for the first resource + * @param secondShares an array to store the shares for the second resource + * @return -1.0, 0.0, or 1.0, depending on whether the max share of the first + * resource is less than, equal to, or greater than the max share of the + * second resource, respectively + * @throws NullPointerException if any parameter is null */ - protected float getResourceAsValue( - Resource clusterResource, Resource resource, boolean dominant) { - // Just use 'dominant' resource - return (dominant) ? - Math.max( - (float)resource.getMemorySize() / clusterResource.getMemorySize(), - (float)resource.getVirtualCores() / clusterResource.getVirtualCores() - ) - : - Math.min( - (float)resource.getMemorySize() / clusterResource.getMemorySize(), - (float)resource.getVirtualCores() / clusterResource.getVirtualCores() - ); + private void calculateShares(ResourceInformation[] clusterRes, Resource first, + Resource second, double[] firstShares, double[] secondShares) { + ResourceInformation[] firstRes = first.getResources(); + ResourceInformation[] secondRes = second.getResources(); + + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + firstShares[i] = calculateShare(clusterRes[i], firstRes[i]); + secondShares[i] = calculateShare(clusterRes[i], secondRes[i]); + } } - - @Override - public long computeAvailableContainers(Resource available, Resource required) { - return Math.min( - available.getMemorySize() / required.getMemorySize(), - available.getVirtualCores() / required.getVirtualCores()); + + /** + * Calculate the shares for {@code first} and {@code second} according to + * {@code clusterRes}, and store the results in {@code firstShares} and + * {@code secondShares}, respectively. All parameters must be non-null. + * This method assumes that the length of {@code clusterRes} is exactly 2 and + * makes performance optimizations based on that assumption. + * @param clusterRes the array of ResourceInformation instances that + * represents the cluster's maximum resources + * @param first the first resource to compare + * @param second the second resource to compare + * @param firstShares an array to store the shares for the first resource + * @param secondShares an array to store the shares for the second resource + * @return -1.0, 0.0, or 1.0, depending on whether the max share of the first + * resource is less than, equal to, or greater than the max share of the + * second resource, respectively + * @throws NullPointerException if any parameter is null + */ + private int calculateSharesForTwoMandatoryResources( + ResourceInformation[] clusterRes, Resource first, Resource second, + double[] firstShares, double[] secondShares) { + ResourceInformation[] firstRes = first.getResources(); + ResourceInformation[] secondRes = second.getResources(); + firstShares[0] = calculateShare(clusterRes[0], firstRes[0]); + secondShares[0] = calculateShare(clusterRes[0], secondRes[0]); + firstShares[1] = calculateShare(clusterRes[1], firstRes[1]); + secondShares[1] = calculateShare(clusterRes[1], secondRes[1]); + + int firstDom = 0; + int firstSub = 1; + if (firstShares[1] > firstShares[0]) { + firstDom = 1; + firstSub = 0; + } + int secondDom = 0; + int secondSub = 1; + if (secondShares[1] > secondShares[0]) { + secondDom = 1; + secondSub = 0; + } + + if (firstShares[firstDom] > secondShares[secondDom]) { + return 1; + } else if (firstShares[firstDom] < secondShares[secondDom]) { + return -1; + } else if (firstShares[firstSub] > secondShares[secondSub]) { + return 1; + } else if (firstShares[firstSub] < secondShares[secondSub]) { + return -1; + } else { + return 0; + } + } + + /** + * Calculate the shares for {@code first} and {@code second} according to + * {@code clusterRes}, and store the results in {@code firstShares} and + * {@code secondShares}, respectively. {@code max} will be populated with + * the max shares from {@code firstShare} and {@code secondShare} in the + * first and second indices, respectively. All parameters must be non-null, + * and {@code max} must have a length of at least 2. + * @param clusterRes the array of ResourceInformation instances that + * represents the cluster's maximum resources + * @param first the first resource to compare + * @param second the second resource to compare + * @param firstShares an array to store the shares for the first resource + * @param secondShares an array to store the shares for the second resource + * @param max an array to store the max shares of the first and second + * resources + * @return -1.0, 0.0, or 1.0, depending on whether the max share of the first + * resource is less than, equal to, or greater than the max share of the + * second resource, respectively + * @throws NullPointerException if any parameter is null + * @throws ArrayIndexOutOfBoundsException if the length of {@code max} is + * less than 2 + */ + private void calculateShares(ResourceInformation[] clusterRes, Resource first, + Resource second, double[] firstShares, double[] secondShares, + double[] max) { + ResourceInformation[] firstRes = first.getResources(); + ResourceInformation[] secondRes = second.getResources(); + + max[0] = 0.0; + max[1] = 0.0; + + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + firstShares[i] = calculateShare(clusterRes[i], firstRes[i]); + secondShares[i] = calculateShare(clusterRes[i], secondRes[i]); + + if (firstShares[i] > max[0]) { + max[0] = firstShares[i]; + } + + if (secondShares[i] > max[1]) { + max[1] = secondShares[i]; + } + } + } + + /** + * Calculate the share for a resource type. + * @param clusterRes the resource type for the cluster maximum + * @param res the resource type for which to calculate the share + * @return the share + */ + private double calculateShare(ResourceInformation clusterRes, + ResourceInformation res) { + // Convert the resources' units into the cluster resource's units + long value = UnitsConversionUtil.convert(res.getUnits(), + clusterRes.getUnits(), res.getValue()); + + return (double) value / clusterRes.getValue(); + } + + /** + * Compare the two shares arrays by comparing the largest elements, then the + * next largest if the previous were equal, etc. The share arrays must be + * sorted in ascending order. + * @param lhsShares the first share array to compare + * @param rhsShares the second share array to compare + * @return a number that is less than 0 if the first array is less than the + * second, equal to 0 if the arrays are equal, and greater than 0 if the + * first array is greater than the second + */ + private double compareShares(double[] lhsShares, double[] rhsShares) { + double diff = 0.0; + + // lhsShares and rhsShares must necessarily have the same length, because + // everyone uses the same master resource list. + for (int i = lhsShares.length - 1; i >= 0; i--) { + diff = lhsShares[i] - rhsShares[i]; + + if (diff != 0.0) { + break; + } + } + + return diff; } @Override - public float divide(Resource clusterResource, - Resource numerator, Resource denominator) { - return - getResourceAsValue(clusterResource, numerator, true) / - getResourceAsValue(clusterResource, denominator, true); + public long computeAvailableContainers(Resource available, + Resource required) { + long min = Long.MAX_VALUE; + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation availableResource = available + .getResourceInformation(i); + ResourceInformation requiredResource = required.getResourceInformation(i); + long requiredResourceValue = UnitsConversionUtil.convert( + requiredResource.getUnits(), availableResource.getUnits(), + requiredResource.getValue()); + if (requiredResourceValue != 0) { + long tmp = availableResource.getValue() / requiredResourceValue; + min = min < tmp ? min : tmp; + } + } + return min > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) min; } - + + @Override + public float divide(Resource clusterResource, + Resource numerator, Resource denominator) { + int nKnownResourceTypes = ResourceUtils.getNumberOfKnownResourceTypes(); + ResourceInformation[] clusterRes = clusterResource.getResources(); + // We have to provide the calculateShares() method with somewhere to store + // the shares. We don't actually need these shares afterwards. + double[] numeratorShares = new double[nKnownResourceTypes]; + double[] denominatorShares = new double[nKnownResourceTypes]; + // We also have to provide a place for calculateShares() to store the max + // shares so that we can use them. + double[] max = new double[2]; + + calculateShares(clusterRes, numerator, denominator, numeratorShares, + denominatorShares, max); + + return (float) (max[0] / max[1]); + } + @Override public boolean isInvalidDivisor(Resource r) { - if (r.getMemorySize() == 0.0f || r.getVirtualCores() == 0.0f) { - return true; + for (ResourceInformation res : r.getResources()) { + if (res.getValue() == 0L) { + return true; + } } return false; } @Override public float ratio(Resource a, Resource b) { - return Math.max( - (float)a.getMemorySize()/b.getMemorySize(), - (float)a.getVirtualCores()/b.getVirtualCores() - ); + float ratio = 0.0f; + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation aResourceInformation = a.getResourceInformation(i); + ResourceInformation bResourceInformation = b.getResourceInformation(i); + long bResourceValue = UnitsConversionUtil.convert( + bResourceInformation.getUnits(), aResourceInformation.getUnits(), + bResourceInformation.getValue()); + float tmp = (float) aResourceInformation.getValue() + / (float) bResourceValue; + ratio = ratio > tmp ? ratio : tmp; + } + return ratio; } @Override public Resource divideAndCeil(Resource numerator, int denominator) { - return Resources.createResource( - divideAndCeil(numerator.getMemorySize(), denominator), - divideAndCeil(numerator.getVirtualCores(), denominator) - ); + return divideAndCeil(numerator, (long) denominator); + } + + public Resource divideAndCeil(Resource numerator, long denominator) { + Resource ret = Resource.newInstance(numerator); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation resourceInformation = ret.getResourceInformation(i); + resourceInformation + .setValue(divideAndCeil(resourceInformation.getValue(), denominator)); + } + return ret; } @Override @@ -164,80 +423,136 @@ public Resource divideAndCeil(Resource numerator, float denominator) { @Override public Resource normalize(Resource r, Resource minimumResource, - Resource maximumResource, Resource stepFactor) { - if (stepFactor.getMemorySize() == 0 || stepFactor.getVirtualCores() == 0) { - Resource step = Resources.clone(stepFactor); - if (stepFactor.getMemorySize() == 0) { - LOG.error("Memory cannot be allocated in increments of zero. Assuming " - + minimumResource.getMemorySize() + "MB increment size. " - + "Please ensure the scheduler configuration is correct."); - step.setMemorySize(minimumResource.getMemorySize()); - } + Resource maximumResource, Resource stepFactor) { + Resource ret = Resource.newInstance(r); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation rResourceInformation = r.getResourceInformation(i); + ResourceInformation minimumResourceInformation = minimumResource + .getResourceInformation(i); + ResourceInformation maximumResourceInformation = maximumResource + .getResourceInformation(i); + ResourceInformation stepFactorResourceInformation = stepFactor + .getResourceInformation(i); + ResourceInformation tmp = ret.getResourceInformation(i); - if (stepFactor.getVirtualCores() == 0) { - LOG.error("VCore cannot be allocated in increments of zero. Assuming " - + minimumResource.getVirtualCores() + "VCores increment size. " - + "Please ensure the scheduler configuration is correct."); - step.setVirtualCores(minimumResource.getVirtualCores()); + long rValue = rResourceInformation.getValue(); + long minimumValue = UnitsConversionUtil.convert( + minimumResourceInformation.getUnits(), + rResourceInformation.getUnits(), + minimumResourceInformation.getValue()); + long maximumValue = UnitsConversionUtil.convert( + maximumResourceInformation.getUnits(), + rResourceInformation.getUnits(), + maximumResourceInformation.getValue()); + long stepFactorValue = UnitsConversionUtil.convert( + stepFactorResourceInformation.getUnits(), + rResourceInformation.getUnits(), + stepFactorResourceInformation.getValue()); + long value = Math.max(rValue, minimumValue); + if (stepFactorValue != 0) { + value = roundUp(value, stepFactorValue); } - - stepFactor = step; + tmp.setValue(Math.min(value, maximumValue)); + ret.setResourceInformation(i, tmp); } - - long normalizedMemory = Math.min( - roundUp( - Math.max(r.getMemorySize(), minimumResource.getMemorySize()), - stepFactor.getMemorySize()), - maximumResource.getMemorySize()); - int normalizedCores = Math.min( - roundUp( - Math.max(r.getVirtualCores(), minimumResource.getVirtualCores()), - stepFactor.getVirtualCores()), - maximumResource.getVirtualCores()); - return Resources.createResource(normalizedMemory, - normalizedCores); + return ret; } @Override public Resource roundUp(Resource r, Resource stepFactor) { - return Resources.createResource( - roundUp(r.getMemorySize(), stepFactor.getMemorySize()), - roundUp(r.getVirtualCores(), stepFactor.getVirtualCores()) - ); + return this.rounding(r, stepFactor, true); } @Override public Resource roundDown(Resource r, Resource stepFactor) { - return Resources.createResource( - roundDown(r.getMemorySize(), stepFactor.getMemorySize()), - roundDown(r.getVirtualCores(), stepFactor.getVirtualCores()) - ); + return this.rounding(r, stepFactor, false); + } + + private Resource rounding(Resource r, Resource stepFactor, boolean roundUp) { + Resource ret = Resource.newInstance(r); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation rResourceInformation = r.getResourceInformation(i); + ResourceInformation stepFactorResourceInformation = stepFactor + .getResourceInformation(i); + + long rValue = rResourceInformation.getValue(); + long stepFactorValue = UnitsConversionUtil.convert( + stepFactorResourceInformation.getUnits(), + rResourceInformation.getUnits(), + stepFactorResourceInformation.getValue()); + long value = rValue; + if (stepFactorValue != 0) { + value = roundUp + ? roundUp(rValue, stepFactorValue) + : roundDown(rValue, stepFactorValue); + } + ResourceInformation.copy(rResourceInformation, + ret.getResourceInformation(i)); + ret.getResourceInformation(i).setValue(value); + } + return ret; } @Override public Resource multiplyAndNormalizeUp(Resource r, double by, Resource stepFactor) { - return Resources.createResource( - roundUp((long) Math.ceil((float) (r.getMemorySize() * by)), - stepFactor.getMemorySize()), - roundUp((int) Math.ceil((float) (r.getVirtualCores() * by)), - stepFactor.getVirtualCores())); + return this.multiplyAndNormalize(r, by, stepFactor, true); } @Override public Resource multiplyAndNormalizeDown(Resource r, double by, Resource stepFactor) { - return Resources.createResource( - roundDown((long) (r.getMemorySize() * by), stepFactor.getMemorySize()), - roundDown((int) (r.getVirtualCores() * by), - stepFactor.getVirtualCores())); + return this.multiplyAndNormalize(r, by, stepFactor, false); + } + + private Resource multiplyAndNormalize(Resource r, double by, + Resource stepFactor, boolean roundUp) { + Resource ret = Resource.newInstance(r); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation rResourceInformation = r.getResourceInformation(i); + ResourceInformation stepFactorResourceInformation = stepFactor + .getResourceInformation(i); + ResourceInformation tmp = ret.getResourceInformation(i); + + long rValue = rResourceInformation.getValue(); + long stepFactorValue = UnitsConversionUtil.convert( + stepFactorResourceInformation.getUnits(), + rResourceInformation.getUnits(), + stepFactorResourceInformation.getValue()); + long value; + if (stepFactorValue != 0) { + value = roundUp + ? roundUp((long) Math.ceil((float) (rValue * by)), stepFactorValue) + : roundDown((long) (rValue * by), stepFactorValue); + } else { + value = roundUp + ? (long) Math.ceil((float) (rValue * by)) + : (long) (rValue * by); + } + tmp.setValue(value); + } + return ret; } @Override - public boolean fitsIn(Resource cluster, - Resource smaller, Resource bigger) { - return smaller.getMemorySize() <= bigger.getMemorySize() - && smaller.getVirtualCores() <= bigger.getVirtualCores(); + public boolean fitsIn(Resource smaller, Resource bigger) { + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + ResourceInformation sResourceInformation = smaller + .getResourceInformation(i); + ResourceInformation bResourceInformation = bigger + .getResourceInformation(i); + long sResourceValue = UnitsConversionUtil.convert( + sResourceInformation.getUnits(), bResourceInformation.getUnits(), + sResourceInformation.getValue()); + if (sResourceValue > bResourceInformation.getValue()) { + return false; + } + } + return true; } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java index 398dac50fa5..d59560fa24d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java @@ -225,8 +225,7 @@ public abstract float divide( /** * Check if a smaller resource can be contained by bigger resource. */ - public abstract boolean fitsIn(Resource cluster, - Resource smaller, Resource bigger); + public abstract boolean fitsIn(Resource smaller, Resource bigger); /** * Check if resource has any major resource types (which are all NodeManagers diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java index a1d14fdce73..793aebfe0d8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java @@ -18,104 +18,124 @@ package org.apache.hadoop.yarn.util.resource; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException; +import org.apache.hadoop.yarn.util.UnitsConversionUtil; -@InterfaceAudience.LimitedPrivate({"YARN", "MapReduce"}) +/** + * Resources is a computation class which provides a set of apis to do + * mathematical operations on Resource object. + */ +@InterfaceAudience.LimitedPrivate({ "YARN", "MapReduce" }) @Unstable public class Resources { - - // Java doesn't have const :( - private static final Resource NONE = new Resource() { + + private static final Log LOG = + LogFactory.getLog(Resources.class); + + /** + * Helper class to create a resource with a fixed value for all resource + * types. For example, a NONE resource which returns 0 for any resource type. + */ + @InterfaceAudience.Private + @Unstable + static class FixedValueResource extends Resource { + + private final long resourceValue; + private String name; + + /** + * Constructor for a fixed value resource. + * @param rName the name of the resource + * @param value the fixed value to be returned for all resource types + */ + FixedValueResource(String rName, long value) { + this.resourceValue = value; + this.name = rName; + initResourceMap(); + } + + private int resourceValueToInt() { + if(this.resourceValue > Integer.MAX_VALUE) { + return Integer.MAX_VALUE; + } + return Long.valueOf(this.resourceValue).intValue(); + } @Override @SuppressWarnings("deprecation") public int getMemory() { - return 0; + return resourceValueToInt(); } @Override public long getMemorySize() { - return 0; - } - - @Override - public void setMemorySize(long memory) { - throw new RuntimeException("NONE cannot be modified!"); + return this.resourceValue; } @Override @SuppressWarnings("deprecation") public void setMemory(int memory) { - throw new RuntimeException("NONE cannot be modified!"); - } - - @Override - public int getVirtualCores() { - return 0; - } - - @Override - public void setVirtualCores(int cores) { - throw new RuntimeException("NONE cannot be modified!"); - } - - @Override - public int compareTo(Resource o) { - long diff = 0 - o.getMemorySize(); - if (diff == 0) { - diff = 0 - o.getVirtualCores(); - } - return Long.signum(diff); - } - - }; - - private static final Resource UNBOUNDED = new Resource() { - - @Override - @SuppressWarnings("deprecation") - public int getMemory() { - return Integer.MAX_VALUE; - } - - @Override - public long getMemorySize() { - return Long.MAX_VALUE; - } - - @Override - @SuppressWarnings("deprecation") - public void setMemory(int memory) { - throw new RuntimeException("UNBOUNDED cannot be modified!"); + throw new RuntimeException(name + " cannot be modified!"); } @Override public void setMemorySize(long memory) { - throw new RuntimeException("UNBOUNDED cannot be modified!"); + throw new RuntimeException(name + " cannot be modified!"); } @Override public int getVirtualCores() { - return Integer.MAX_VALUE; + return resourceValueToInt(); } @Override - public void setVirtualCores(int cores) { - throw new RuntimeException("UNBOUNDED cannot be modified!"); + public void setVirtualCores(int virtualCores) { + throw new RuntimeException(name + " cannot be modified!"); } @Override - public int compareTo(Resource o) { - long diff = Long.MAX_VALUE - o.getMemorySize(); - if (diff == 0) { - diff = Integer.MAX_VALUE - o.getVirtualCores(); + public void setResourceInformation(int index, + ResourceInformation resourceInformation) + throws ResourceNotFoundException { + throw new RuntimeException(name + " cannot be modified!"); + } + + @Override + public void setResourceValue(int index, long value) + throws ResourceNotFoundException { + throw new RuntimeException(name + " cannot be modified!"); + } + + @Override + public void setResourceInformation(String resource, + ResourceInformation resourceInformation) + throws ResourceNotFoundException { + throw new RuntimeException(name + " cannot be modified!"); + } + + @Override + public void setResourceValue(String resource, long value) + throws ResourceNotFoundException { + throw new RuntimeException(name + " cannot be modified!"); + } + + private void initResourceMap() { + ResourceInformation[] types = ResourceUtils.getResourceTypesArray(); + if (types != null) { + resources = new ResourceInformation[types.length]; + for (int index = 0; index < types.length; index++) { + resources[index] = ResourceInformation.newInstance(types[index]); + resources[index].setValue(resourceValue); + } } - return Long.signum(diff); } - - }; + } public static Resource createResource(int memory) { return createResource(memory, (memory > 0) ? 1 : 0); @@ -125,6 +145,11 @@ public static Resource createResource(int memory, int cores) { return Resource.newInstance(memory, cores); } + private static final Resource UNBOUNDED = + new FixedValueResource("UNBOUNDED", Long.MAX_VALUE); + + private static final Resource NONE = new FixedValueResource("NONE", 0L); + public static Resource createResource(long memory) { return createResource(memory, (memory > 0) ? 1 : 0); } @@ -152,12 +177,26 @@ public static Resource unbounded() { } public static Resource clone(Resource res) { - return createResource(res.getMemorySize(), res.getVirtualCores()); + return Resource.newInstance(res); } public static Resource addTo(Resource lhs, Resource rhs) { - lhs.setMemorySize(lhs.getMemorySize() + rhs.getMemorySize()); - lhs.setVirtualCores(lhs.getVirtualCores() + rhs.getVirtualCores()); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation rhsValue = rhs.getResourceInformation(i); + ResourceInformation lhsValue = lhs.getResourceInformation(i); + + long convertedRhs = (rhsValue.getUnits().equals(lhsValue.getUnits())) + ? rhsValue.getValue() + : UnitsConversionUtil.convert(rhsValue.getUnits(), + lhsValue.getUnits(), rhsValue.getValue()); + lhs.setResourceValue(i, lhsValue.getValue() + convertedRhs); + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } return lhs; } @@ -166,8 +205,22 @@ public static Resource add(Resource lhs, Resource rhs) { } public static Resource subtractFrom(Resource lhs, Resource rhs) { - lhs.setMemorySize(lhs.getMemorySize() - rhs.getMemorySize()); - lhs.setVirtualCores(lhs.getVirtualCores() - rhs.getVirtualCores()); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation rhsValue = rhs.getResourceInformation(i); + ResourceInformation lhsValue = lhs.getResourceInformation(i); + + long convertedRhs = (rhsValue.getUnits().equals(lhsValue.getUnits())) + ? rhsValue.getValue() + : UnitsConversionUtil.convert(rhsValue.getUnits(), + lhsValue.getUnits(), rhsValue.getValue()); + lhs.setResourceValue(i, lhsValue.getValue() - convertedRhs); + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } return lhs; } @@ -198,8 +251,16 @@ public static Resource negate(Resource resource) { } public static Resource multiplyTo(Resource lhs, double by) { - lhs.setMemorySize((long)(lhs.getMemorySize() * by)); - lhs.setVirtualCores((int)(lhs.getVirtualCores() * by)); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation lhsValue = lhs.getResourceInformation(i); + lhs.setResourceValue(i, (long) (lhsValue.getValue() * by)); + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } return lhs; } @@ -213,9 +274,24 @@ public static Resource multiply(Resource lhs, double by) { */ public static Resource multiplyAndAddTo( Resource lhs, Resource rhs, double by) { - lhs.setMemorySize(lhs.getMemorySize() + (long)(rhs.getMemorySize() * by)); - lhs.setVirtualCores(lhs.getVirtualCores() - + (int)(rhs.getVirtualCores() * by)); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation rhsValue = rhs.getResourceInformation(i); + ResourceInformation lhsValue = lhs.getResourceInformation(i); + + long convertedRhs = (long) (((rhsValue.getUnits() + .equals(lhsValue.getUnits())) + ? rhsValue.getValue() + : UnitsConversionUtil.convert(rhsValue.getUnits(), + lhsValue.getUnits(), rhsValue.getValue())) + * by); + lhs.setResourceValue(i, lhsValue.getValue() + convertedRhs); + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } return lhs; } @@ -231,8 +307,16 @@ public static Resource multiplyAndNormalizeDown( public static Resource multiplyAndRoundDown(Resource lhs, double by) { Resource out = clone(lhs); - out.setMemorySize((long)(lhs.getMemorySize() * by)); - out.setVirtualCores((int)(lhs.getVirtualCores() * by)); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation lhsValue = lhs.getResourceInformation(i); + out.setResourceValue(i, (long) (lhsValue.getValue() * by)); + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } return out; } @@ -332,23 +416,78 @@ public static Resource max( } public static boolean fitsIn(Resource smaller, Resource bigger) { - return smaller.getMemorySize() <= bigger.getMemorySize() && - smaller.getVirtualCores() <= bigger.getVirtualCores(); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation rhsValue = bigger.getResourceInformation(i); + ResourceInformation lhsValue = smaller.getResourceInformation(i); + + long convertedRhs = (rhsValue.getUnits().equals(lhsValue.getUnits())) + ? rhsValue.getValue() + : UnitsConversionUtil.convert(rhsValue.getUnits(), + lhsValue.getUnits(), rhsValue.getValue()); + if (lhsValue.getValue() > convertedRhs) { + return false; + } + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } + return true; } - public static boolean fitsIn(ResourceCalculator rc, Resource cluster, + public static boolean fitsIn(ResourceCalculator rc, Resource smaller, Resource bigger) { - return rc.fitsIn(cluster, smaller, bigger); + return rc.fitsIn(smaller, bigger); } public static Resource componentwiseMin(Resource lhs, Resource rhs) { - return createResource(Math.min(lhs.getMemorySize(), rhs.getMemorySize()), - Math.min(lhs.getVirtualCores(), rhs.getVirtualCores())); + Resource ret = createResource(0); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation rhsValue = rhs.getResourceInformation(i); + ResourceInformation lhsValue = lhs.getResourceInformation(i); + + long convertedRhs = (rhsValue.getUnits().equals(lhsValue.getUnits())) + ? rhsValue.getValue() + : UnitsConversionUtil.convert(rhsValue.getUnits(), + lhsValue.getUnits(), rhsValue.getValue()); + ResourceInformation outInfo = lhsValue.getValue() < convertedRhs + ? lhsValue + : rhsValue; + ret.setResourceInformation(i, outInfo); + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } + return ret; } public static Resource componentwiseMax(Resource lhs, Resource rhs) { - return createResource(Math.max(lhs.getMemorySize(), rhs.getMemorySize()), - Math.max(lhs.getVirtualCores(), rhs.getVirtualCores())); + Resource ret = createResource(0); + int maxLength = ResourceUtils.getNumberOfKnownResourceTypes(); + for (int i = 0; i < maxLength; i++) { + try { + ResourceInformation rhsValue = rhs.getResourceInformation(i); + ResourceInformation lhsValue = lhs.getResourceInformation(i); + + long convertedRhs = (rhsValue.getUnits().equals(lhsValue.getUnits())) + ? rhsValue.getValue() + : UnitsConversionUtil.convert(rhsValue.getUnits(), + lhsValue.getUnits(), rhsValue.getValue()); + ResourceInformation outInfo = lhsValue.getValue() > convertedRhs + ? lhsValue + : rhsValue; + ret.setResourceInformation(i, outInfo); + } catch (ResourceNotFoundException ye) { + LOG.warn("Resource is missing:" + ye.getMessage()); + continue; + } + } + return ret; } public static boolean isAnyMajorResourceZero(ResourceCalculator rc, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 0cad1672ba9..0440458e5e2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -944,6 +944,22 @@ 600000 + + + Flag to enable/disable resource profiles + + yarn.resourcemanager.resource-profiles.enabled + false + + + + + If resource profiles is enabled, source file for the profiles + + yarn.resourcemanager.resource-profiles.source-file + resource-profiles.json + + @@ -987,6 +1003,12 @@ org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor + + Comma separated List of container state transition listeners. + yarn.nodemanager.container-state-transition-listener.classes + + + Number of threads container manager uses. yarn.nodemanager.container-manager.thread-count @@ -1571,6 +1593,14 @@ false + + Comma separated list of runtimes that are allowed when using + LinuxContainerExecutor. The allowed values are default, docker, and + javasandbox. + yarn.nodemanager.runtime.linux.allowed-runtimes + default + + This configuration setting determines the capabilities assigned to docker containers when they are launched. While these may not @@ -2993,6 +3023,15 @@ 100 + + + Use container pause as the preemption policy over kill in the container + queue at a NodeManager. + + yarn.nodemanager.opportunistic-containers-use-pause-for-preemption + false + + Error filename pattern, to identify the file in the container's @@ -3289,4 +3328,24 @@ false + + + + yarn.resource-types + + + The resource types to be used for scheduling. Use resource-types.xml + to specify details about the individual resource types. + + + + + yarn.resourcemanager.display.per-user-apps + false + + Flag to enable display of applications per user as an admin + configuration. + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java index 82170b31342..86946518db3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java @@ -37,6 +37,9 @@ public class BasePBImplRecordsTest { @SuppressWarnings("checkstyle:visibilitymodifier") protected static HashMap typeValueCache = new HashMap(); + @SuppressWarnings("checkstyle:visibilitymodifier") + protected static HashMap> excludedPropertiesMap = + new HashMap<>(); private static Random rand = new Random(); private static byte [] bytes = new byte[] {'1', '2', '3', '4'}; @@ -167,6 +170,10 @@ public String toString() { private Map getGetSetPairs(Class recordClass) throws Exception { Map ret = new HashMap(); + List excluded = null; + if (excludedPropertiesMap.containsKey(recordClass.getClass())) { + excluded = excludedPropertiesMap.get(recordClass.getClass()); + } Method [] methods = recordClass.getDeclaredMethods(); // get all get methods for (int i = 0; i < methods.length; i++) { @@ -224,6 +231,11 @@ private Map getGetSetPairs(Class recordClass) (gsp.setMethod == null)) { LOG.info(String.format("Exclude potential property: %s\n", gsp.propertyName)); itr.remove(); + } else if ((excluded != null && excluded.contains(gsp.propertyName))) { + LOG.info(String.format( + "Excluding potential property(present in exclusion list): %s\n", + gsp.propertyName)); + itr.remove(); } else { LOG.info(String.format("New property: %s type: %s", gsp.toString(), gsp.type)); gsp.testValue = genTypeValue(gsp.type); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java index a3f5491cdc0..c5585c28b21 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java @@ -42,6 +42,9 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FinishApplicationMasterRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FinishApplicationMasterResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceProfilesResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceTypeInfoRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllResourceTypeInfoResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationAttemptReportRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationAttemptReportResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationAttemptsRequestPBImpl; @@ -74,6 +77,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueInfoResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueUserAclsInfoRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueUserAclsInfoResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetResourceProfileRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetResourceProfileResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersResourceRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersResourceResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.KillApplicationRequestPBImpl; @@ -127,6 +132,7 @@ import org.apache.hadoop.yarn.api.records.PreemptionMessage; import org.apache.hadoop.yarn.api.records.PreemptionResourceRequest; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ProfileCapability; import org.apache.hadoop.yarn.api.records.QueueConfigurations; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.QueueState; @@ -139,9 +145,11 @@ import org.apache.hadoop.yarn.api.records.ReservationRequests; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceAllocationRequest; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest; import org.apache.hadoop.yarn.api.records.ResourceOption; import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; import org.apache.hadoop.yarn.api.records.ResourceUtilization; import org.apache.hadoop.yarn.api.records.SerializedException; import org.apache.hadoop.yarn.api.records.StrictPreemptionContract; @@ -174,18 +182,21 @@ import org.apache.hadoop.yarn.api.records.impl.pb.PreemptionMessagePBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.PreemptionResourceRequestPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.PriorityPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ProfileCapabilityPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.QueueInfoPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.QueueUserACLInfoPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ResourceBlacklistRequestPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ResourceOptionPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ResourceRequestPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ResourceTypeInfoPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.SerializedExceptionPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.StrictPreemptionContractPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.TokenPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.URLPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.UpdateContainerRequestPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.YarnClusterMetricsPBImpl; +import org.apache.hadoop.yarn.proto.YarnProtos; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationAttemptIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationAttemptReportProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto; @@ -301,6 +312,10 @@ import org.apache.hadoop.yarn.proto.YarnServiceProtos.StopContainersResponseProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllResourceProfilesResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetResourceProfileRequestProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetResourceProfileResponseProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ProfileCapabilityProto; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.AddToClusterNodeLabelsRequestPBImpl; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.AddToClusterNodeLabelsResponsePBImpl; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.CheckForDecommissioningNodesRequestPBImpl; @@ -332,6 +347,7 @@ import org.junit.Test; import com.google.common.collect.ImmutableSet; +import java.util.Arrays; /** * Test class for YARN API protocol records. @@ -346,6 +362,8 @@ public static void setup() throws Exception { typeValueCache.put(SerializedException.class, SerializedException.newInstance(new IOException("exception for test"))); generateByNewInstance(ExecutionTypeRequest.class); + typeValueCache.put(ResourceInformation.class, ResourceInformation + .newInstance("localhost.test/sample", 1l)); generateByNewInstance(LogAggregationContext.class); generateByNewInstance(ApplicationId.class); generateByNewInstance(ApplicationAttemptId.class); @@ -359,6 +377,7 @@ public static void setup() throws Exception { generateByNewInstance(NodeReport.class); generateByNewInstance(Token.class); generateByNewInstance(NMToken.class); + generateByNewInstance(ProfileCapability.class); generateByNewInstance(ResourceRequest.class); generateByNewInstance(ApplicationAttemptReport.class); generateByNewInstance(ApplicationResourceUsageReport.class); @@ -408,6 +427,7 @@ public static void setup() throws Exception { generateByNewInstance(ApplicationTimeout.class); generateByNewInstance(QueueConfigurations.class); generateByNewInstance(CollectorInfo.class); + generateByNewInstance(ResourceTypeInfo.class); } @Test @@ -731,6 +751,8 @@ public void testApplicationReportPBImpl() throws Exception { @Test public void testApplicationResourceUsageReportPBImpl() throws Exception { + excludedPropertiesMap.put(ApplicationResourceUsageReportPBImpl.class.getClass(), + Arrays.asList("PreemptedResourceSecondsMap", "ResourceSecondsMap")); validatePBImplRecord(ApplicationResourceUsageReportPBImpl.class, ApplicationResourceUsageReportProto.class); } @@ -1153,4 +1175,46 @@ public void testExecutionTypeRequestPBImpl() throws Exception { validatePBImplRecord(ExecutionTypeRequestPBImpl.class, ExecutionTypeRequestProto.class); } + + @Test + public void testGetAllResourceProfilesResponsePBImpl() throws Exception { + validatePBImplRecord(GetAllResourceProfilesResponsePBImpl.class, + GetAllResourceProfilesResponseProto.class); + } + + @Test + public void testGetResourceProfileRequestPBImpl() throws Exception { + validatePBImplRecord(GetResourceProfileRequestPBImpl.class, + GetResourceProfileRequestProto.class); + } + + @Test + public void testGetResourceProfileResponsePBImpl() throws Exception { + validatePBImplRecord(GetResourceProfileResponsePBImpl.class, + GetResourceProfileResponseProto.class); + } + + @Test + public void testProfileCapabilityPBImpl() throws Exception { + validatePBImplRecord(ProfileCapabilityPBImpl.class, + ProfileCapabilityProto.class); + } + + @Test + public void testResourceTypesInfoPBImpl() throws Exception { + validatePBImplRecord(ResourceTypeInfoPBImpl.class, + YarnProtos.ResourceTypeInfoProto.class); + } + + @Test + public void testGetAllResourceTypesInfoRequestPBImpl() throws Exception { + validatePBImplRecord(GetAllResourceTypeInfoRequestPBImpl.class, + YarnServiceProtos.GetAllResourceTypeInfoRequestProto.class); + } + + @Test + public void testGetAllResourceTypesInfoResponsePBImpl() throws Exception { + validatePBImplRecord(GetAllResourceTypeInfoResponsePBImpl.class, + YarnServiceProtos.GetAllResourceTypeInfoResponseProto.class); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestResourcePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestResourcePBImpl.java new file mode 100644 index 00000000000..569a7b74f8b --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestResourcePBImpl.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api; + +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl; +import org.apache.hadoop.yarn.proto.YarnProtos; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test class to handle various proto related tests for resources. + */ +public class TestResourcePBImpl { + @Test + public void testEmptyResourcePBInit() throws Exception { + Resource res = new ResourcePBImpl(); + // Assert to check it sets resource value and unit to default. + Assert.assertEquals(0, res.getMemorySize()); + Assert.assertEquals(ResourceInformation.MEMORY_MB.getUnits(), + res.getResourceInformation(ResourceInformation.MEMORY_MB.getName()) + .getUnits()); + Assert.assertEquals(ResourceInformation.VCORES.getUnits(), + res.getResourceInformation(ResourceInformation.VCORES.getName()) + .getUnits()); + } + + @Test + public void testResourcePBInitFromOldPB() throws Exception { + YarnProtos.ResourceProto proto = + YarnProtos.ResourceProto.newBuilder().setMemory(1024).setVirtualCores(3) + .build(); + // Assert to check it sets resource value and unit to default. + Resource res = new ResourcePBImpl(proto); + Assert.assertEquals(1024, res.getMemorySize()); + Assert.assertEquals(3, res.getVirtualCores()); + Assert.assertEquals(ResourceInformation.MEMORY_MB.getUnits(), + res.getResourceInformation(ResourceInformation.MEMORY_MB.getName()) + .getUnits()); + Assert.assertEquals(ResourceInformation.VCORES.getUnits(), + res.getResourceInformation(ResourceInformation.VCORES.getName()) + .getUnits()); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/TestLogAggregationIndexFileController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/TestLogAggregationIndexFileController.java new file mode 100644 index 00000000000..f77ad96d3ef --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/TestLogAggregationIndexFileController.java @@ -0,0 +1,314 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.logaggregation.filecontroller.ifile; + +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintStream; +import java.io.Writer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.api.records.ApplicationAccessType; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.logaggregation.ContainerLogMeta; +import org.apache.hadoop.yarn.logaggregation.ContainerLogsRequest; +import org.apache.hadoop.yarn.logaggregation.LogAggregationUtils; +import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogKey; +import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogValue; +import org.apache.hadoop.yarn.logaggregation.ContainerLogFileInfo; +import org.apache.hadoop.yarn.logaggregation.filecontroller.LogAggregationFileControllerContext; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +/** + * Function test for {@link LogAggregationIndexFileController}. + * + */ +public class TestLogAggregationIndexFileController { + + private final String rootLocalLogDir = "target/LocalLogs"; + private final Path rootLocalLogDirPath = new Path(rootLocalLogDir); + private final String remoteLogDir = "target/remote-app"; + private static final FsPermission LOG_FILE_UMASK = FsPermission + .createImmutable((short) (0777)); + private static final UserGroupInformation USER_UGI = UserGroupInformation + .createRemoteUser("testUser"); + private FileSystem fs; + private Configuration conf; + private ApplicationId appId; + private ContainerId containerId; + private NodeId nodeId; + + private ByteArrayOutputStream sysOutStream; + private PrintStream sysOut; + + private ByteArrayOutputStream sysErrStream; + private PrintStream sysErr; + + @Before + public void setUp() throws IOException { + appId = ApplicationId.newInstance(123456, 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance( + appId, 1); + containerId = ContainerId.newContainerId(attemptId, 1); + nodeId = NodeId.newInstance("localhost", 9999); + conf = new Configuration(); + conf.set("yarn.log-aggregation.Indexed.remote-app-log-dir", + remoteLogDir); + conf.set("yarn.log-aggregation.Indexed.remote-app-log-dir-suffix", + "logs"); + conf.set(YarnConfiguration.NM_LOG_AGG_COMPRESSION_TYPE, "gz"); + fs = FileSystem.get(conf); + sysOutStream = new ByteArrayOutputStream(); + sysOut = new PrintStream(sysOutStream); + System.setOut(sysOut); + + sysErrStream = new ByteArrayOutputStream(); + sysErr = new PrintStream(sysErrStream); + System.setErr(sysErr); + } + + @After + public void teardown() throws Exception { + fs.delete(rootLocalLogDirPath, true); + fs.delete(new Path(remoteLogDir), true); + } + + @Test(timeout = 15000) + public void testLogAggregationIndexFileFormat() throws Exception { + if (fs.exists(rootLocalLogDirPath)) { + fs.delete(rootLocalLogDirPath, true); + } + assertTrue(fs.mkdirs(rootLocalLogDirPath)); + + Path appLogsDir = new Path(rootLocalLogDirPath, appId.toString()); + if (fs.exists(appLogsDir)) { + fs.delete(appLogsDir, true); + } + assertTrue(fs.mkdirs(appLogsDir)); + + List logTypes = new ArrayList(); + logTypes.add("syslog"); + logTypes.add("stdout"); + logTypes.add("stderr"); + + Set files = new HashSet<>(); + + LogKey key1 = new LogKey(containerId.toString()); + + for(String logType : logTypes) { + File file = createAndWriteLocalLogFile(containerId, appLogsDir, + logType); + files.add(file); + } + LogValue value = mock(LogValue.class); + when(value.getPendingLogFilesToUploadForThisContainer()).thenReturn(files); + + LogAggregationIndexedFileController fileFormat + = new LogAggregationIndexedFileController(); + fileFormat.initialize(conf, "Indexed"); + + Map appAcls = new HashMap<>(); + Path appDir = fileFormat.getRemoteAppLogDir(appId, + USER_UGI.getShortUserName()); + if (fs.exists(appDir)) { + fs.delete(appDir, true); + } + assertTrue(fs.mkdirs(appDir)); + + Path logPath = fileFormat.getRemoteNodeLogFileForApp( + appId, USER_UGI.getShortUserName(), nodeId); + LogAggregationFileControllerContext context = + new LogAggregationFileControllerContext( + logPath, logPath, true, 1000, appId, appAcls, nodeId, USER_UGI); + // initialize the writer + fileFormat.initializeWriter(context); + + fileFormat.write(key1, value); + fileFormat.postWrite(context); + fileFormat.closeWriter(); + + ContainerLogsRequest logRequest = new ContainerLogsRequest(); + logRequest.setAppId(appId); + logRequest.setNodeId(nodeId.toString()); + logRequest.setAppOwner(USER_UGI.getShortUserName()); + logRequest.setContainerId(containerId.toString()); + logRequest.setBytes(Long.MAX_VALUE); + List meta = fileFormat.readAggregatedLogsMeta( + logRequest); + Assert.assertTrue(meta.size() == 1); + List fileNames = new ArrayList<>(); + for (ContainerLogMeta log : meta) { + Assert.assertTrue(log.getContainerId().equals(containerId.toString())); + Assert.assertTrue(log.getNodeId().equals(nodeId.toString())); + Assert.assertTrue(log.getContainerLogMeta().size() == 3); + for (ContainerLogFileInfo file : log.getContainerLogMeta()) { + fileNames.add(file.getFileName()); + } + } + fileNames.removeAll(logTypes); + Assert.assertTrue(fileNames.isEmpty()); + + boolean foundLogs = fileFormat.readAggregatedLogs(logRequest, System.out); + Assert.assertTrue(foundLogs); + for (String logType : logTypes) { + Assert.assertTrue(sysOutStream.toString().contains(logMessage( + containerId, logType))); + } + sysOutStream.reset(); + + // create a checksum file + Path checksumFile = new Path(fileFormat.getRemoteAppLogDir( + appId, USER_UGI.getShortUserName()), + LogAggregationUtils.getNodeString(nodeId) + + LogAggregationIndexedFileController.CHECK_SUM_FILE_SUFFIX); + FSDataOutputStream fInput = null; + try { + fInput = FileSystem.create(fs, checksumFile, LOG_FILE_UMASK); + fInput.writeLong(0); + } finally { + IOUtils.closeQuietly(fInput); + } + meta = fileFormat.readAggregatedLogsMeta( + logRequest); + Assert.assertTrue(meta.size() == 0); + foundLogs = fileFormat.readAggregatedLogs(logRequest, System.out); + Assert.assertFalse(foundLogs); + sysOutStream.reset(); + fs.delete(checksumFile, false); + Assert.assertFalse(fs.exists(checksumFile)); + + List newLogTypes = new ArrayList<>(logTypes); + files.clear(); + newLogTypes.add("test1"); + files.add(createAndWriteLocalLogFile(containerId, appLogsDir, + "test1")); + newLogTypes.add("test2"); + files.add(createAndWriteLocalLogFile(containerId, appLogsDir, + "test2")); + LogValue value2 = mock(LogValue.class); + when(value2.getPendingLogFilesToUploadForThisContainer()) + .thenReturn(files); + + // initialize the writer + fileFormat.initializeWriter(context); + fileFormat.write(key1, value2); + fileFormat.closeWriter(); + + // We did not call postWriter which we would keep the checksum file. + // We can only get the logs/logmeta from the first write. + fileFormat.readAggregatedLogsMeta( + logRequest); + Assert.assertEquals(meta.size(), meta.size(), 1); + for (ContainerLogMeta log : meta) { + Assert.assertTrue(log.getContainerId().equals(containerId.toString())); + Assert.assertTrue(log.getNodeId().equals(nodeId.toString())); + Assert.assertTrue(log.getContainerLogMeta().size() == 3); + for (ContainerLogFileInfo file : log.getContainerLogMeta()) { + fileNames.add(file.getFileName()); + } + } + fileNames.removeAll(logTypes); + Assert.assertTrue(fileNames.isEmpty()); + foundLogs = fileFormat.readAggregatedLogs(logRequest, System.out); + Assert.assertTrue(foundLogs); + for (String logType : logTypes) { + Assert.assertTrue(sysOutStream.toString().contains(logMessage( + containerId, logType))); + } + Assert.assertFalse(sysOutStream.toString().contains(logMessage( + containerId, "test1"))); + Assert.assertFalse(sysOutStream.toString().contains(logMessage( + containerId, "test2"))); + sysOutStream.reset(); + + // Call postWrite and we should get all logs/logmetas for both + // first write and second write + fileFormat.initializeWriter(context); + fileFormat.write(key1, value2); + fileFormat.postWrite(context); + fileFormat.closeWriter(); + fileFormat.readAggregatedLogsMeta( + logRequest); + Assert.assertEquals(meta.size(), meta.size(), 2); + for (ContainerLogMeta log : meta) { + Assert.assertTrue(log.getContainerId().equals(containerId.toString())); + Assert.assertTrue(log.getNodeId().equals(nodeId.toString())); + for (ContainerLogFileInfo file : log.getContainerLogMeta()) { + fileNames.add(file.getFileName()); + } + } + fileNames.removeAll(newLogTypes); + Assert.assertTrue(fileNames.isEmpty()); + foundLogs = fileFormat.readAggregatedLogs(logRequest, System.out); + Assert.assertTrue(foundLogs); + for (String logType : newLogTypes) { + Assert.assertTrue(sysOutStream.toString().contains(logMessage( + containerId, logType))); + } + sysOutStream.reset(); + } + + private File createAndWriteLocalLogFile(ContainerId containerId, + Path localLogDir, String logType) throws IOException { + File file = new File(localLogDir.toString(), logType); + if (file.exists()) { + file.delete(); + } + file.createNewFile(); + Writer writer = null; + try { + writer = new FileWriter(file); + writer.write(logMessage(containerId, logType)); + writer.close(); + return file; + } finally { + IOUtils.closeQuietly(writer); + } + } + + private String logMessage(ContainerId containerId, String logType) { + StringBuilder sb = new StringBuilder(); + sb.append("Hello " + containerId + " in " + logType + "!"); + return sb.toString(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java index b123b0520d4..5f3ed196048 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java @@ -21,15 +21,20 @@ import java.util.Arrays; import java.util.Collection; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import static org.junit.Assert.assertEquals; + @RunWith(Parameterized.class) public class TestResourceCalculator { - private ResourceCalculator resourceCalculator; + private final ResourceCalculator resourceCalculator; @Parameterized.Parameters public static Collection getParameters() { @@ -38,41 +43,199 @@ public static Collection getParameters() { { new DominantResourceCalculator() } }); } + @Before + public void setupNoExtraResource() { + // This has to run before each test because we don't know when + // setupExtraResource() might be called + ResourceUtils.resetResourceTypes(new Configuration()); + } + + private static void setupExtraResource() { + Configuration conf = new Configuration(); + + conf.set(YarnConfiguration.RESOURCE_TYPES, "test"); + ResourceUtils.resetResourceTypes(conf); + } + public TestResourceCalculator(ResourceCalculator rs) { this.resourceCalculator = rs; } @Test(timeout = 10000) public void testFitsIn() { - Resource cluster = Resource.newInstance(1024, 1); if (resourceCalculator instanceof DefaultResourceCalculator) { - Assert.assertTrue(resourceCalculator.fitsIn(cluster, + Assert.assertTrue(resourceCalculator.fitsIn( Resource.newInstance(1, 2), Resource.newInstance(2, 1))); - Assert.assertTrue(resourceCalculator.fitsIn(cluster, + Assert.assertTrue(resourceCalculator.fitsIn( Resource.newInstance(1, 2), Resource.newInstance(2, 2))); - Assert.assertTrue(resourceCalculator.fitsIn(cluster, + Assert.assertTrue(resourceCalculator.fitsIn( Resource.newInstance(1, 2), Resource.newInstance(1, 2))); - Assert.assertTrue(resourceCalculator.fitsIn(cluster, + Assert.assertTrue(resourceCalculator.fitsIn( Resource.newInstance(1, 2), Resource.newInstance(1, 1))); - Assert.assertFalse(resourceCalculator.fitsIn(cluster, + Assert.assertFalse(resourceCalculator.fitsIn( Resource.newInstance(2, 1), Resource.newInstance(1, 2))); } else if (resourceCalculator instanceof DominantResourceCalculator) { - Assert.assertFalse(resourceCalculator.fitsIn(cluster, + Assert.assertFalse(resourceCalculator.fitsIn( Resource.newInstance(1, 2), Resource.newInstance(2, 1))); - Assert.assertTrue(resourceCalculator.fitsIn(cluster, + Assert.assertTrue(resourceCalculator.fitsIn( Resource.newInstance(1, 2), Resource.newInstance(2, 2))); - Assert.assertTrue(resourceCalculator.fitsIn(cluster, + Assert.assertTrue(resourceCalculator.fitsIn( Resource.newInstance(1, 2), Resource.newInstance(1, 2))); - Assert.assertFalse(resourceCalculator.fitsIn(cluster, + Assert.assertFalse(resourceCalculator.fitsIn( Resource.newInstance(1, 2), Resource.newInstance(1, 1))); - Assert.assertFalse(resourceCalculator.fitsIn(cluster, + Assert.assertFalse(resourceCalculator.fitsIn( Resource.newInstance(2, 1), Resource.newInstance(1, 2))); } } + private Resource newResource(long memory, int cpu) { + Resource res = Resource.newInstance(memory, cpu); + + return res; + } + + private Resource newResource(long memory, int cpu, int test) { + Resource res = newResource(memory, cpu); + + res.setResourceValue("test", test); + + return res; + } + + /** + * Test that the compare() method returns the expected result (0, -1, or 1). + * If the expected result is not 0, this method will also test the resources + * in the opposite order and check for the negative of the expected result. + * + * @param cluster the cluster resource + * @param res1 the LHS resource + * @param res2 the RHS resource + * @param expected the expected result + */ + private void assertComparison(Resource cluster, Resource res1, Resource res2, + int expected) { + int actual = resourceCalculator.compare(cluster, res1, res2); + + assertEquals(String.format("Resource comparison did not give the expected " + + "result for %s v/s %s", res1.toString(), res2.toString()), + expected, actual); + + if (expected != 0) { + // Try again with args in the opposite order and the negative of the + // expected result. + actual = resourceCalculator.compare(cluster, res2, res1); + assertEquals(String.format("Resource comparison did not give the " + + "expected result for %s v/s %s", res2.toString(), res1.toString()), + expected * -1, actual); + } + } + + @Test + public void testCompareWithOnlyMandatory() { + // This test is necessary because there are optimizations that are only + // triggered when only the mandatory resources are configured. + + // Keep cluster resources even so that the numbers are easy to understand + Resource cluster = newResource(4, 4); + + assertComparison(cluster, newResource(1, 1), newResource(1, 1), 0); + assertComparison(cluster, newResource(0, 0), newResource(0, 0), 0); + assertComparison(cluster, newResource(2, 2), newResource(1, 1), 1); + assertComparison(cluster, newResource(2, 2), newResource(0, 0), 1); + + if (resourceCalculator instanceof DefaultResourceCalculator) { + testCompareDefaultWithOnlyMandatory(cluster); + } else if (resourceCalculator instanceof DominantResourceCalculator) { + testCompareDominantWithOnlyMandatory(cluster); + } + } + + private void testCompareDefaultWithOnlyMandatory(Resource cluster) { + assertComparison(cluster, newResource(1, 1), newResource(1, 1), 0); + assertComparison(cluster, newResource(1, 2), newResource(1, 1), 0); + assertComparison(cluster, newResource(1, 1), newResource(1, 0), 0); + assertComparison(cluster, newResource(2, 1), newResource(1, 1), 1); + assertComparison(cluster, newResource(2, 1), newResource(1, 2), 1); + assertComparison(cluster, newResource(2, 1), newResource(1, 0), 1); + } + + private void testCompareDominantWithOnlyMandatory(Resource cluster) { + assertComparison(cluster, newResource(2, 1), newResource(2, 1), 0); + assertComparison(cluster, newResource(2, 1), newResource(1, 2), 0); + assertComparison(cluster, newResource(2, 1), newResource(1, 1), 1); + assertComparison(cluster, newResource(2, 2), newResource(2, 1), 1); + assertComparison(cluster, newResource(2, 2), newResource(1, 2), 1); + assertComparison(cluster, newResource(3, 1), newResource(3, 0), 1); + } + + @Test + public void testCompare() { + // Test with 3 resources + setupExtraResource(); + + // Keep cluster resources even so that the numbers are easy to understand + Resource cluster = newResource(4L, 4, 4); + + assertComparison(cluster, newResource(1, 1, 1), newResource(1, 1, 1), 0); + assertComparison(cluster, newResource(0, 0, 0), newResource(0, 0, 0), 0); + assertComparison(cluster, newResource(2, 2, 2), newResource(1, 1, 1), 1); + assertComparison(cluster, newResource(2, 2, 2), newResource(0, 0, 0), 1); + + if (resourceCalculator instanceof DefaultResourceCalculator) { + testCompareDefault(cluster); + } else if (resourceCalculator instanceof DominantResourceCalculator) { + testCompareDominant(cluster); + } + } + + private void testCompareDefault(Resource cluster) { + assertComparison(cluster, newResource(1, 1, 2), newResource(1, 1, 1), 0); + assertComparison(cluster, newResource(1, 2, 1), newResource(1, 1, 1), 0); + assertComparison(cluster, newResource(1, 2, 2), newResource(1, 1, 1), 0); + assertComparison(cluster, newResource(1, 2, 2), newResource(1, 0, 0), 0); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 1, 1), 1); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 2, 1), 1); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 1, 2), 1); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 2, 2), 1); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 0, 0), 1); + } + + private void testCompareDominant(Resource cluster) { + assertComparison(cluster, newResource(2, 1, 1), newResource(2, 1, 1), 0); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 2, 1), 0); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 1, 2), 0); + assertComparison(cluster, newResource(2, 1, 0), newResource(0, 1, 2), 0); + assertComparison(cluster, newResource(2, 2, 1), newResource(1, 2, 2), 0); + assertComparison(cluster, newResource(2, 2, 1), newResource(2, 1, 2), 0); + assertComparison(cluster, newResource(2, 2, 1), newResource(2, 2, 1), 0); + assertComparison(cluster, newResource(2, 2, 0), newResource(2, 0, 2), 0); + assertComparison(cluster, newResource(3, 2, 1), newResource(3, 2, 1), 0); + assertComparison(cluster, newResource(3, 2, 1), newResource(3, 1, 2), 0); + assertComparison(cluster, newResource(3, 2, 1), newResource(1, 2, 3), 0); + assertComparison(cluster, newResource(3, 2, 1), newResource(1, 3, 2), 0); + assertComparison(cluster, newResource(3, 2, 1), newResource(2, 1, 3), 0); + assertComparison(cluster, newResource(3, 2, 1), newResource(2, 3, 1), 0); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 1, 1), 1); + assertComparison(cluster, newResource(2, 1, 1), newResource(1, 1, 0), 1); + assertComparison(cluster, newResource(2, 2, 1), newResource(2, 1, 1), 1); + assertComparison(cluster, newResource(2, 2, 1), newResource(1, 2, 1), 1); + assertComparison(cluster, newResource(2, 2, 1), newResource(1, 1, 2), 1); + assertComparison(cluster, newResource(2, 2, 1), newResource(0, 2, 2), 1); + assertComparison(cluster, newResource(2, 2, 2), newResource(2, 1, 1), 1); + assertComparison(cluster, newResource(2, 2, 2), newResource(1, 2, 1), 1); + assertComparison(cluster, newResource(2, 2, 2), newResource(1, 1, 2), 1); + assertComparison(cluster, newResource(2, 2, 2), newResource(2, 2, 1), 1); + assertComparison(cluster, newResource(2, 2, 2), newResource(2, 1, 2), 1); + assertComparison(cluster, newResource(2, 2, 2), newResource(1, 2, 2), 1); + assertComparison(cluster, newResource(3, 2, 1), newResource(2, 2, 2), 1); + assertComparison(cluster, newResource(3, 1, 1), newResource(2, 2, 2), 1); + assertComparison(cluster, newResource(3, 1, 1), newResource(3, 1, 0), 1); + assertComparison(cluster, newResource(3, 1, 1), newResource(3, 0, 0), 1); + } + @Test(timeout = 10000) - public void testResourceCalculatorCompareMethod() { + public void testCompareWithEmptyCluster() { Resource clusterResource = Resource.newInstance(0, 0); // For lhs == rhs @@ -126,27 +289,27 @@ private void assertResourcesOperations(Resource clusterResource, boolean greaterThan, boolean greaterThanOrEqual, Resource max, Resource min) { - Assert.assertEquals("Less Than operation is wrongly calculated.", lessThan, + assertEquals("Less Than operation is wrongly calculated.", lessThan, Resources.lessThan(resourceCalculator, clusterResource, lhs, rhs)); - Assert.assertEquals( + assertEquals( "Less Than Or Equal To operation is wrongly calculated.", lessThanOrEqual, Resources.lessThanOrEqual(resourceCalculator, clusterResource, lhs, rhs)); - Assert.assertEquals("Greater Than operation is wrongly calculated.", + assertEquals("Greater Than operation is wrongly calculated.", greaterThan, Resources.greaterThan(resourceCalculator, clusterResource, lhs, rhs)); - Assert.assertEquals( + assertEquals( "Greater Than Or Equal To operation is wrongly calculated.", greaterThanOrEqual, Resources.greaterThanOrEqual(resourceCalculator, clusterResource, lhs, rhs)); - Assert.assertEquals("Max(value) Operation wrongly calculated.", max, + assertEquals("Max(value) Operation wrongly calculated.", max, Resources.max(resourceCalculator, clusterResource, lhs, rhs)); - Assert.assertEquals("Min(value) operation is wrongly calculated.", min, + assertEquals("Min(value) operation is wrongly calculated.", min, Resources.min(resourceCalculator, clusterResource, lhs, rhs)); } @@ -164,13 +327,13 @@ public void testNormalize() { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(2 * 1024, result.getMemorySize()); + assertEquals(2 * 1024, result.getMemorySize()); } else if (resourceCalculator instanceof DominantResourceCalculator) { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(2 * 1024, result.getMemorySize()); - Assert.assertEquals(4, result.getVirtualCores()); + assertEquals(2 * 1024, result.getMemorySize()); + assertEquals(4, result.getVirtualCores()); } // if resources asked are less than minimum resource, then normalize it to @@ -183,13 +346,13 @@ public void testNormalize() { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(2 * 1024, result.getMemorySize()); + assertEquals(2 * 1024, result.getMemorySize()); } else if (resourceCalculator instanceof DominantResourceCalculator) { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(2 * 1024, result.getMemorySize()); - Assert.assertEquals(2, result.getVirtualCores()); + assertEquals(2 * 1024, result.getMemorySize()); + assertEquals(2, result.getVirtualCores()); } // if resources asked are larger than maximum resource, then normalize it to @@ -202,13 +365,13 @@ public void testNormalize() { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(8 * 1024, result.getMemorySize()); + assertEquals(8 * 1024, result.getMemorySize()); } else if (resourceCalculator instanceof DominantResourceCalculator) { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(8 * 1024, result.getMemorySize()); - Assert.assertEquals(8, result.getVirtualCores()); + assertEquals(8 * 1024, result.getMemorySize()); + assertEquals(8, result.getVirtualCores()); } // if increment is 0, use minimum resource as the increment resource. @@ -220,13 +383,13 @@ public void testNormalize() { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(2 * 1024, result.getMemorySize()); + assertEquals(2 * 1024, result.getMemorySize()); } else if (resourceCalculator instanceof DominantResourceCalculator) { Resource result = Resources.normalize(resourceCalculator, ask, min, max, increment); - Assert.assertEquals(2 * 1024, result.getMemorySize()); - Assert.assertEquals(2, result.getVirtualCores()); + assertEquals(2 * 1024, result.getMemorySize()); + assertEquals(2, result.getVirtualCores()); } } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java new file mode 100644 index 00000000000..a5550a70f22 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceUtils.java @@ -0,0 +1,306 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.util.resource; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.File; +import java.util.HashMap; +import java.util.Map; + +/** + * Test class to verify all resource utility methods. + */ +public class TestResourceUtils { + + static class ResourceFileInformation { + String filename; + int resourceCount; + Map resourceNameUnitsMap; + + public ResourceFileInformation(String name, int count) { + filename = name; + resourceCount = count; + resourceNameUnitsMap = new HashMap<>(); + } + } + + @Before + public void setup() { + ResourceUtils.resetResourceTypes(); + } + + @After + public void teardown() { + Configuration conf = new YarnConfiguration(); + File source = new File( + conf.getClassLoader().getResource("resource-types-1.xml").getFile()); + File dest = new File(source.getParent(), "resource-types.xml"); + if (dest.exists()) { + dest.delete(); + } + } + + private void testMemoryAndVcores(Map res) { + String memory = ResourceInformation.MEMORY_MB.getName(); + String vcores = ResourceInformation.VCORES.getName(); + Assert.assertTrue("Resource 'memory' missing", res.containsKey(memory)); + Assert.assertEquals("'memory' units incorrect", + ResourceInformation.MEMORY_MB.getUnits(), res.get(memory).getUnits()); + Assert.assertEquals("'memory' types incorrect", + ResourceInformation.MEMORY_MB.getResourceType(), + res.get(memory).getResourceType()); + Assert.assertTrue("Resource 'vcores' missing", res.containsKey(vcores)); + Assert.assertEquals("'vcores' units incorrect", + ResourceInformation.VCORES.getUnits(), res.get(vcores).getUnits()); + Assert.assertEquals("'vcores' type incorrect", + ResourceInformation.VCORES.getResourceType(), + res.get(vcores).getResourceType()); + } + + @Test + public void testGetResourceTypes() throws Exception { + + Map res = ResourceUtils.getResourceTypes(); + Assert.assertEquals(2, res.size()); + testMemoryAndVcores(res); + } + + @Test + public void testGetResourceTypesConfigs() throws Exception { + + Configuration conf = new YarnConfiguration(); + + ResourceFileInformation testFile1 = + new ResourceFileInformation("resource-types-1.xml", 2); + ResourceFileInformation testFile2 = + new ResourceFileInformation("resource-types-2.xml", 3); + testFile2.resourceNameUnitsMap.put("resource1", "G"); + ResourceFileInformation testFile3 = + new ResourceFileInformation("resource-types-3.xml", 3); + testFile3.resourceNameUnitsMap.put("resource2", ""); + ResourceFileInformation testFile4 = + new ResourceFileInformation("resource-types-4.xml", 4); + testFile4.resourceNameUnitsMap.put("resource1", "G"); + testFile4.resourceNameUnitsMap.put("resource2", "m"); + + ResourceFileInformation[] tests = {testFile1, testFile2, testFile3, + testFile4}; + Map res; + for (ResourceFileInformation testInformation : tests) { + ResourceUtils.resetResourceTypes(); + File source = new File( + conf.getClassLoader().getResource(testInformation.filename) + .getFile()); + File dest = new File(source.getParent(), "resource-types.xml"); + FileUtils.copyFile(source, dest); + res = ResourceUtils.getResourceTypes(); + testMemoryAndVcores(res); + Assert.assertEquals(testInformation.resourceCount, res.size()); + for (Map.Entry entry : testInformation.resourceNameUnitsMap + .entrySet()) { + String resourceName = entry.getKey(); + Assert.assertTrue("Missing key " + resourceName, + res.containsKey(resourceName)); + Assert.assertEquals(entry.getValue(), res.get(resourceName).getUnits()); + } + dest.delete(); + } + } + + @Test + public void testGetResourceTypesConfigErrors() throws Exception { + Configuration conf = new YarnConfiguration(); + + String[] resourceFiles = {"resource-types-error-1.xml", + "resource-types-error-2.xml", "resource-types-error-3.xml", + "resource-types-error-4.xml"}; + for (String resourceFile : resourceFiles) { + ResourceUtils.resetResourceTypes(); + File dest = null; + try { + File source = + new File(conf.getClassLoader().getResource(resourceFile).getFile()); + dest = new File(source.getParent(), "resource-types.xml"); + FileUtils.copyFile(source, dest); + ResourceUtils.getResourceTypes(); + Assert.fail("Expected error with file " + resourceFile); + } catch (NullPointerException ne) { + throw ne; + } catch (Exception e) { + if (dest != null) { + dest.delete(); + } + } + } + } + + @Test + public void testInitializeResourcesMap() throws Exception { + String[] empty = {"", ""}; + String[] res1 = {"resource1", "m"}; + String[] res2 = {"resource2", "G"}; + String[][] test1 = {empty}; + String[][] test2 = {res1}; + String[][] test3 = {res2}; + String[][] test4 = {res1, res2}; + + String[][][] allTests = {test1, test2, test3, test4}; + + for (String[][] test : allTests) { + + Configuration conf = new YarnConfiguration(); + String resSt = ""; + for (String[] resources : test) { + resSt += (resources[0] + ","); + } + resSt = resSt.substring(0, resSt.length() - 1); + conf.set(YarnConfiguration.RESOURCE_TYPES, resSt); + for (String[] resources : test) { + String name = + YarnConfiguration.RESOURCE_TYPES + "." + resources[0] + ".units"; + conf.set(name, resources[1]); + } + Map ret = + ResourceUtils.resetResourceTypes(conf); + + // for test1, 4 - length will be 1, 4 + // for the others, len will be 3 + int len = 3; + if (test == test1) { + len = 2; + } else if (test == test4) { + len = 4; + } + + Assert.assertEquals(len, ret.size()); + for (String[] resources : test) { + if (resources[0].length() == 0) { + continue; + } + Assert.assertTrue(ret.containsKey(resources[0])); + ResourceInformation resInfo = ret.get(resources[0]); + Assert.assertEquals(resources[1], resInfo.getUnits()); + Assert.assertEquals(ResourceTypes.COUNTABLE, resInfo.getResourceType()); + } + // we must always have memory and vcores with their fixed units + Assert.assertTrue(ret.containsKey("memory-mb")); + ResourceInformation memInfo = ret.get("memory-mb"); + Assert.assertEquals("Mi", memInfo.getUnits()); + Assert.assertEquals(ResourceTypes.COUNTABLE, memInfo.getResourceType()); + Assert.assertTrue(ret.containsKey("vcores")); + ResourceInformation vcoresInfo = ret.get("vcores"); + Assert.assertEquals("", vcoresInfo.getUnits()); + Assert + .assertEquals(ResourceTypes.COUNTABLE, vcoresInfo.getResourceType()); + } + } + + @Test + public void testInitializeResourcesMapErrors() throws Exception { + + String[] mem1 = {"memory-mb", ""}; + String[] vcores1 = {"vcores", "M"}; + + String[] mem2 = {"memory-mb", "m"}; + String[] vcores2 = {"vcores", "G"}; + + String[] mem3 = {"memory", ""}; + + String[][] test1 = {mem1, vcores1}; + String[][] test2 = {mem2, vcores2}; + String[][] test3 = {mem3}; + + String[][][] allTests = {test1, test2, test3}; + + for (String[][] test : allTests) { + + Configuration conf = new YarnConfiguration(); + String resSt = ""; + for (String[] resources : test) { + resSt += (resources[0] + ","); + } + resSt = resSt.substring(0, resSt.length() - 1); + conf.set(YarnConfiguration.RESOURCE_TYPES, resSt); + for (String[] resources : test) { + String name = + YarnConfiguration.RESOURCE_TYPES + "." + resources[0] + ".units"; + conf.set(name, resources[1]); + } + try { + ResourceUtils.initializeResourcesMap(conf); + Assert.fail("resource map initialization should fail"); + } catch (Exception e) { + // do nothing + } + } + } + + @Test + public void testGetResourceInformation() throws Exception { + + Configuration conf = new YarnConfiguration(); + Map testRun = new HashMap<>(); + setupResourceTypes(conf, "resource-types-4.xml"); + // testRun.put("node-resources-1.xml", Resource.newInstance(1024, 1)); + Resource test3Resources = Resource.newInstance(1024, 1); + test3Resources.setResourceInformation("resource1", + ResourceInformation.newInstance("resource1", "Gi", 5L)); + test3Resources.setResourceInformation("resource2", + ResourceInformation.newInstance("resource2", "m", 2L)); + testRun.put("node-resources-2.xml", test3Resources); + + for (Map.Entry entry : testRun.entrySet()) { + String resourceFile = entry.getKey(); + ResourceUtils.resetNodeResources(); + File dest; + File source = new File( + conf.getClassLoader().getResource(resourceFile).getFile()); + dest = new File(source.getParent(), "node-resources.xml"); + FileUtils.copyFile(source, dest); + Map actual = ResourceUtils + .getNodeResourceInformation(conf); + Assert.assertEquals(actual.size(), + entry.getValue().getResources().length); + for (ResourceInformation resInfo : entry.getValue().getResources()) { + Assert.assertEquals(resInfo, actual.get(resInfo.getName())); + } + dest.delete(); + } + } + + public static String setupResourceTypes(Configuration conf, String filename) + throws Exception { + File source = new File( + conf.getClassLoader().getResource(filename).getFile()); + File dest = new File(source.getParent(), "resource-types.xml"); + FileUtils.copyFile(source, dest); + ResourceUtils.getResourceTypes(); + return dest.getAbsolutePath(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResources.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResources.java index d79179ac0d9..a8404fbaee7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResources.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResources.java @@ -18,35 +18,102 @@ package org.apache.hadoop.yarn.util.resource; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.junit.After; +import org.junit.Before; import org.junit.Test; +import java.io.File; + +import static org.apache.hadoop.yarn.util.resource.Resources.componentwiseMin; +import static org.apache.hadoop.yarn.util.resource.Resources.componentwiseMax; +import static org.apache.hadoop.yarn.util.resource.Resources.add; +import static org.apache.hadoop.yarn.util.resource.Resources.subtract; +import static org.apache.hadoop.yarn.util.resource.Resources.multiply; +import static org.apache.hadoop.yarn.util.resource.Resources.multiplyAndAddTo; +import static org.apache.hadoop.yarn.util.resource.Resources.multiplyAndRoundDown; +import static org.apache.hadoop.yarn.util.resource.Resources.fitsIn; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; public class TestResources { - + + static class ExtendedResources extends Resources { + public static Resource unbounded() { + return new FixedValueResource("UNBOUNDED", Long.MAX_VALUE); + } + + public static Resource none() { + return new FixedValueResource("NONE", 0L); + } + } + + private static final String EXTRA_RESOURCE_TYPE = "resource2"; + private String resourceTypesFile; + + private void setupExtraResourceType() throws Exception { + Configuration conf = new YarnConfiguration(); + resourceTypesFile = + TestResourceUtils.setupResourceTypes(conf, "resource-types-3.xml"); + } + + private void unsetExtraResourceType() { + deleteResourceTypesFile(); + ResourceUtils.resetResourceTypes(); + } + + private void deleteResourceTypesFile() { + if (resourceTypesFile != null && !resourceTypesFile.isEmpty()) { + File resourceFile = new File(resourceTypesFile); + resourceFile.delete(); + } + } + + @Before + public void setup() throws Exception { + setupExtraResourceType(); + } + + @After + public void teardown() { + deleteResourceTypesFile(); + } + public Resource createResource(long memory, int vCores) { return Resource.newInstance(memory, vCores); } - @Test(timeout=10000) - public void testCompareToWithUnboundedResource() { - assertTrue(Resources.unbounded().compareTo( - createResource(Long.MAX_VALUE, Integer.MAX_VALUE)) == 0); - assertTrue(Resources.unbounded().compareTo( - createResource(Long.MAX_VALUE, 0)) > 0); - assertTrue(Resources.unbounded().compareTo( - createResource(0, Integer.MAX_VALUE)) > 0); + public Resource createResource(long memory, int vCores, long resource2) { + Resource ret = Resource.newInstance(memory, vCores); + ret.setResourceInformation(EXTRA_RESOURCE_TYPE, + ResourceInformation.newInstance(EXTRA_RESOURCE_TYPE, resource2)); + return ret; } - @Test(timeout=10000) + @Test(timeout = 10000) + public void testCompareToWithUnboundedResource() { + unsetExtraResourceType(); + Resource unboundedClone = Resources.clone(ExtendedResources.unbounded()); + assertTrue(unboundedClone + .compareTo(createResource(Long.MAX_VALUE, Integer.MAX_VALUE)) == 0); + assertTrue(unboundedClone.compareTo(createResource(Long.MAX_VALUE, 0)) > 0); + assertTrue( + unboundedClone.compareTo(createResource(0, Integer.MAX_VALUE)) > 0); + } + + @Test(timeout = 10000) public void testCompareToWithNoneResource() { assertTrue(Resources.none().compareTo(createResource(0, 0)) == 0); - assertTrue(Resources.none().compareTo( - createResource(1, 0)) < 0); - assertTrue(Resources.none().compareTo( - createResource(0, 1)) < 0); + assertTrue(Resources.none().compareTo(createResource(1, 0)) < 0); + assertTrue(Resources.none().compareTo(createResource(0, 1)) < 0); + assertTrue(Resources.none().compareTo(createResource(0, 0, 0)) == 0); + assertTrue(Resources.none().compareTo(createResource(1, 0, 0)) < 0); + assertTrue(Resources.none().compareTo(createResource(0, 1, 0)) < 0); + assertTrue(Resources.none().compareTo(createResource(0, 0, 1)) < 0); } @Test(timeout=10000) @@ -69,4 +136,131 @@ public void testMultipleRoundUp() { assertEquals(memoryErrorMsg, result.getMemorySize(), 0); assertEquals(vcoreErrorMsg, result.getVirtualCores(), 0); } + + @Test(timeout = 1000) + public void testFitsIn() { + assertTrue(fitsIn(createResource(1, 1), createResource(2, 2))); + assertTrue(fitsIn(createResource(2, 2), createResource(2, 2))); + assertFalse(fitsIn(createResource(2, 2), createResource(1, 1))); + assertFalse(fitsIn(createResource(1, 2), createResource(2, 1))); + assertFalse(fitsIn(createResource(2, 1), createResource(1, 2))); + assertTrue(fitsIn(createResource(1, 1, 1), createResource(2, 2, 2))); + assertTrue(fitsIn(createResource(1, 1, 0), createResource(2, 2, 0))); + assertTrue(fitsIn(createResource(1, 1, 1), createResource(2, 2, 2))); + } + + @Test(timeout = 1000) + public void testComponentwiseMin() { + assertEquals(createResource(1, 1), + componentwiseMin(createResource(1, 1), createResource(2, 2))); + assertEquals(createResource(1, 1), + componentwiseMin(createResource(2, 2), createResource(1, 1))); + assertEquals(createResource(1, 1), + componentwiseMin(createResource(1, 2), createResource(2, 1))); + assertEquals(createResource(1, 1, 1), + componentwiseMin(createResource(1, 1, 1), createResource(2, 2, 2))); + assertEquals(createResource(1, 1, 0), + componentwiseMin(createResource(2, 2, 2), createResource(1, 1))); + assertEquals(createResource(1, 1, 2), + componentwiseMin(createResource(1, 2, 2), createResource(2, 1, 3))); + } + + @Test + public void testComponentwiseMax() { + assertEquals(createResource(2, 2), + componentwiseMax(createResource(1, 1), createResource(2, 2))); + assertEquals(createResource(2, 2), + componentwiseMax(createResource(2, 2), createResource(1, 1))); + assertEquals(createResource(2, 2), + componentwiseMax(createResource(1, 2), createResource(2, 1))); + assertEquals(createResource(2, 2, 2), + componentwiseMax(createResource(1, 1, 1), createResource(2, 2, 2))); + assertEquals(createResource(2, 2, 2), + componentwiseMax(createResource(2, 2, 2), createResource(1, 1))); + assertEquals(createResource(2, 2, 3), + componentwiseMax(createResource(1, 2, 2), createResource(2, 1, 3))); + assertEquals(createResource(2, 2, 1), + componentwiseMax(createResource(2, 2, 0), createResource(2, 1, 1))); + } + + @Test + public void testAdd() { + assertEquals(createResource(2, 3), + add(createResource(1, 1), createResource(1, 2))); + assertEquals(createResource(3, 2), + add(createResource(1, 1), createResource(2, 1))); + assertEquals(createResource(2, 2, 0), + add(createResource(1, 1, 0), createResource(1, 1, 0))); + assertEquals(createResource(2, 2, 3), + add(createResource(1, 1, 1), createResource(1, 1, 2))); + } + + @Test + public void testSubtract() { + assertEquals(createResource(1, 0), + subtract(createResource(2, 1), createResource(1, 1))); + assertEquals(createResource(0, 1), + subtract(createResource(1, 2), createResource(1, 1))); + assertEquals(createResource(2, 2, 0), + subtract(createResource(3, 3, 0), createResource(1, 1, 0))); + assertEquals(createResource(1, 1, 2), + subtract(createResource(2, 2, 3), createResource(1, 1, 1))); + } + + @Test + public void testClone() { + assertEquals(createResource(1, 1), Resources.clone(createResource(1, 1))); + assertEquals(createResource(1, 1, 0), + Resources.clone(createResource(1, 1))); + assertEquals(createResource(1, 1), + Resources.clone(createResource(1, 1, 0))); + assertEquals(createResource(1, 1, 2), + Resources.clone(createResource(1, 1, 2))); + } + + @Test + public void testMultiply() { + assertEquals(createResource(4, 2), multiply(createResource(2, 1), 2)); + assertEquals(createResource(4, 2, 0), multiply(createResource(2, 1), 2)); + assertEquals(createResource(2, 4), multiply(createResource(1, 2), 2)); + assertEquals(createResource(2, 4, 0), multiply(createResource(1, 2), 2)); + assertEquals(createResource(6, 6, 0), multiply(createResource(3, 3, 0), 2)); + assertEquals(createResource(4, 4, 6), multiply(createResource(2, 2, 3), 2)); + } + + @Test + public void testMultiplyAndRoundDown() { + assertEquals(createResource(4, 1), + multiplyAndRoundDown(createResource(3, 1), 1.5)); + assertEquals(createResource(4, 1, 0), + multiplyAndRoundDown(createResource(3, 1), 1.5)); + assertEquals(createResource(1, 4), + multiplyAndRoundDown(createResource(1, 3), 1.5)); + assertEquals(createResource(1, 4, 0), + multiplyAndRoundDown(createResource(1, 3), 1.5)); + assertEquals(createResource(7, 7, 0), + multiplyAndRoundDown(createResource(3, 3, 0), 2.5)); + assertEquals(createResource(2, 2, 7), + multiplyAndRoundDown(createResource(1, 1, 3), 2.5)); + } + + @Test + public void testMultiplyAndAddTo() throws Exception { + unsetExtraResourceType(); + setupExtraResourceType(); + assertEquals(createResource(6, 4), + multiplyAndAddTo(createResource(3, 1), createResource(2, 2), 1.5)); + assertEquals(createResource(6, 4, 0), + multiplyAndAddTo(createResource(3, 1), createResource(2, 2), 1.5)); + assertEquals(createResource(4, 7), + multiplyAndAddTo(createResource(1, 1), createResource(2, 4), 1.5)); + assertEquals(createResource(4, 7, 0), + multiplyAndAddTo(createResource(1, 1), createResource(2, 4), 1.5)); + assertEquals(createResource(6, 4, 0), + multiplyAndAddTo(createResource(3, 1, 0), createResource(2, 2, 0), + 1.5)); + assertEquals(createResource(6, 4, 6), + multiplyAndAddTo(createResource(3, 1, 2), createResource(2, 2, 3), + 1.5)); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/node-resources-1.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/node-resources-1.xml new file mode 100644 index 00000000000..f00573e3077 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/node-resources-1.xml @@ -0,0 +1,29 @@ + + + + + + + + yarn.nodemanager.resource.memory-mb + 1024 + + + + yarn.nodemanager.resource.vcores + 1 + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/node-resources-2.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/node-resources-2.xml new file mode 100644 index 00000000000..9d9b3dc65c8 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/node-resources-2.xml @@ -0,0 +1,39 @@ + + + + + + + + yarn.nodemanager.resource-type.memory-mb + 1024Mi + + + + yarn.nodemanager.resource-type.vcores + 1 + + + + yarn.nodemanager.resource-type.resource1 + 5Gi + + + + yarn.nodemanager.resource-type.resource2 + 2m + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-1.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-1.xml new file mode 100644 index 00000000000..3ec106dfbb2 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-1.xml @@ -0,0 +1,18 @@ + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-2.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-2.xml new file mode 100644 index 00000000000..6e5885ed7d7 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-2.xml @@ -0,0 +1,29 @@ + + + + + + + + yarn.resource-types + resource1 + + + + yarn.resource-types.resource1.units + G + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-3.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-3.xml new file mode 100644 index 00000000000..8fd6fefa8f1 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-3.xml @@ -0,0 +1,24 @@ + + + + + + + + yarn.resource-types + resource2 + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-4.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-4.xml new file mode 100644 index 00000000000..c84316a536e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-4.xml @@ -0,0 +1,34 @@ + + + + + + + + yarn.resource-types + resource1,resource2 + + + + yarn.resource-types.resource1.units + G + + + + yarn.resource-types.resource2.units + m + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-1.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-1.xml new file mode 100644 index 00000000000..d1942f2c97f --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-1.xml @@ -0,0 +1,29 @@ + + + + + + + + yarn.resource-types + memory-mb,resource1 + + + + yarn.resource-types.resource1.calculator-units + G + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-2.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-2.xml new file mode 100644 index 00000000000..fa43b6c14ef --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-2.xml @@ -0,0 +1,33 @@ + + + + + + + + yarn.resource-types + vcores,resource1 + + + + yarn.resource-types.resource1.calculator-units + G + + + + yarn.resource-types.vcores.units + Az + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-3.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-3.xml new file mode 100644 index 00000000000..539d657692e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-3.xml @@ -0,0 +1,29 @@ + + + + + + + + yarn.resource-types + resource1,resource1 + + + + yarn.resource-types.resource1.calculator-units + A + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-4.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-4.xml new file mode 100644 index 00000000000..c8eb7662097 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/resource-types/resource-types-error-4.xml @@ -0,0 +1,24 @@ + + + + + + + + yarn.resource-types + memory,resource1 + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java index 9240ed872e0..0b57717c29f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java @@ -46,6 +46,7 @@ import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.timeline.TimelineEntities; @@ -338,9 +339,20 @@ private static ApplicationReportExt convertToApplicationReport( ApplicationMetricsConstants.APP_MEM_PREEMPT_METRICS); long preemptedVcoreSeconds = parseLong(entityInfo, ApplicationMetricsConstants.APP_CPU_PREEMPT_METRICS); - appResources = ApplicationResourceUsageReport.newInstance(0, 0, null, - null, null, memorySeconds, vcoreSeconds, 0, 0, - preemptedMemorySeconds, preemptedVcoreSeconds); + Map resourceSecondsMap = new HashMap<>(); + Map preemptedResoureSecondsMap = new HashMap<>(); + resourceSecondsMap + .put(ResourceInformation.MEMORY_MB.getName(), memorySeconds); + resourceSecondsMap + .put(ResourceInformation.VCORES.getName(), vcoreSeconds); + preemptedResoureSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), + preemptedMemorySeconds); + preemptedResoureSecondsMap + .put(ResourceInformation.VCORES.getName(), preemptedVcoreSeconds); + + appResources = ApplicationResourceUsageReport + .newInstance(0, 0, null, null, null, resourceSecondsMap, 0, 0, + preemptedResoureSecondsMap); } if (entityInfo.containsKey(ApplicationMetricsConstants.APP_TAGS_INFO)) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/webapp/TestAHSWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/webapp/TestAHSWebServices.java index dc692a59632..bca5d5c45d4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/webapp/TestAHSWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/webapp/TestAHSWebServices.java @@ -53,7 +53,7 @@ import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.logaggregation.ContainerLogAggregationType; -import org.apache.hadoop.yarn.logaggregation.PerContainerLogFileInfo; +import org.apache.hadoop.yarn.logaggregation.ContainerLogFileInfo; import org.apache.hadoop.yarn.logaggregation.TestContainerLogsUtils; import org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryClientService; import org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryManagerOnTimelineStore; @@ -851,7 +851,7 @@ public void testContainerLogsMetaForRunningApps() throws Exception { for (ContainerLogsInfo logInfo : responseText) { if(logInfo.getLogType().equals( ContainerLogAggregationType.AGGREGATED.toString())) { - List logMeta = logInfo + List logMeta = logInfo .getContainerLogsInfo(); assertTrue(logMeta.size() == 1); assertEquals(logMeta.get(0).getFileName(), fileName); @@ -879,7 +879,7 @@ public void testContainerLogsMetaForRunningApps() throws Exception { for (ContainerLogsInfo logInfo : responseText) { if(logInfo.getLogType().equals( ContainerLogAggregationType.AGGREGATED.toString())) { - List logMeta = logInfo + List logMeta = logInfo .getContainerLogsInfo(); assertTrue(logMeta.size() == 1); assertEquals(logMeta.get(0).getFileName(), fileName); @@ -917,7 +917,7 @@ public void testContainerLogsMetaForFinishedApps() throws Exception { assertTrue(responseText.size() == 1); assertEquals(responseText.get(0).getLogType(), ContainerLogAggregationType.AGGREGATED.toString()); - List logMeta = responseText.get(0) + List logMeta = responseText.get(0) .getContainerLogsInfo(); assertTrue(logMeta.size() == 1); assertEquals(logMeta.get(0).getFileName(), fileName); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NMContainerStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NMContainerStatus.java index ed950ce9284..180add80616 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NMContainerStatus.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NMContainerStatus.java @@ -21,6 +21,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; @@ -40,13 +41,14 @@ public static NMContainerStatus newInstance(ContainerId containerId, long creationTime) { return newInstance(containerId, version, containerState, allocatedResource, diagnostics, containerExitStatus, priority, creationTime, - CommonNodeLabelsManager.NO_LABEL); + CommonNodeLabelsManager.NO_LABEL, ExecutionType.GUARANTEED); } public static NMContainerStatus newInstance(ContainerId containerId, int version, ContainerState containerState, Resource allocatedResource, String diagnostics, int containerExitStatus, Priority priority, - long creationTime, String nodeLabelExpression) { + long creationTime, String nodeLabelExpression, + ExecutionType executionType) { NMContainerStatus status = Records.newRecord(NMContainerStatus.class); status.setContainerId(containerId); @@ -58,6 +60,7 @@ public static NMContainerStatus newInstance(ContainerId containerId, status.setPriority(priority); status.setCreationTime(creationTime); status.setNodeLabelExpression(nodeLabelExpression); + status.setExecutionType(executionType); return status; } @@ -134,4 +137,14 @@ public int getVersion() { public void setVersion(int version) { } + + /** + * Get the ExecutionType of the container. + * @return ExecutionType of the container + */ + public ExecutionType getExecutionType() { + return ExecutionType.GUARANTEED; + } + + public void setExecutionType(ExecutionType executionType) { } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java index 2ebca570853..05a9c721e15 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java @@ -113,4 +113,9 @@ public abstract void addAllContainersToUpdate( public abstract void setContainerQueuingLimit( ContainerQueuingLimit containerQueuingLimit); + + public abstract List getContainersToDecrease(); + + public abstract void addAllContainersToDecrease( + Collection containersToDecrease); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NMContainerStatusPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NMContainerStatusPBImpl.java index 2380391e0ee..38df5f6766d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NMContainerStatusPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NMContainerStatusPBImpl.java @@ -20,6 +20,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; +import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerIdPBImpl; @@ -27,6 +28,7 @@ import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils; import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; +import org.apache.hadoop.yarn.proto.YarnProtos; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStateProto; import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto; @@ -249,6 +251,25 @@ public void setNodeLabelExpression(String nodeLabelExpression) { builder.setNodeLabelExpression(nodeLabelExpression); } + @Override + public synchronized ExecutionType getExecutionType() { + NMContainerStatusProtoOrBuilder p = viaProto ? proto : builder; + if (!p.hasExecutionType()) { + return ExecutionType.GUARANTEED; + } + return convertFromProtoFormat(p.getExecutionType()); + } + + @Override + public synchronized void setExecutionType(ExecutionType executionType) { + maybeInitBuilder(); + if (executionType == null) { + builder.clearExecutionType(); + return; + } + builder.setExecutionType(convertToProtoFormat(executionType)); + } + private void mergeLocalToBuilder() { if (this.containerId != null && !((ContainerIdPBImpl) containerId).getProto().equals( @@ -313,4 +334,13 @@ private PriorityPBImpl convertFromProtoFormat(PriorityProto p) { private PriorityProto convertToProtoFormat(Priority t) { return ((PriorityPBImpl)t).getProto(); } + + private ExecutionType convertFromProtoFormat( + YarnProtos.ExecutionTypeProto e) { + return ProtoUtils.convertFromProtoFormat(e); + } + + private YarnProtos.ExecutionTypeProto convertToProtoFormat(ExecutionType e) { + return ProtoUtils.convertToProtoFormat(e); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java index 11f5f61416f..bbd12942191 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java @@ -80,6 +80,8 @@ public class NodeHeartbeatResponsePBImpl extends NodeHeartbeatResponse { private MasterKey nmTokenMasterKey = null; private ContainerQueuingLimit containerQueuingLimit = null; private List containersToUpdate = null; + // NOTE: This is required for backward compatibility. + private List containersToDecrease = null; private List containersToSignal = null; public NodeHeartbeatResponsePBImpl() { @@ -126,6 +128,9 @@ private void mergeLocalToBuilder() { if (this.containersToUpdate != null) { addContainersToUpdateToProto(); } + if (this.containersToDecrease != null) { + addContainersToDecreaseToProto(); + } if (this.containersToSignal != null) { addContainersToSignalToProto(); } @@ -572,6 +577,66 @@ public void remove() { builder.addAllContainersToUpdate(iterable); } + private void initContainersToDecrease() { + if (this.containersToDecrease != null) { + return; + } + NodeHeartbeatResponseProtoOrBuilder p = viaProto ? proto : builder; + List list = p.getContainersToDecreaseList(); + this.containersToDecrease = new ArrayList<>(); + + for (ContainerProto c : list) { + this.containersToDecrease.add(convertFromProtoFormat(c)); + } + } + + @Override + public List getContainersToDecrease() { + initContainersToDecrease(); + return this.containersToDecrease; + } + + @Override + public void addAllContainersToDecrease( + final Collection containersToDecrease) { + if (containersToDecrease == null) { + return; + } + initContainersToDecrease(); + this.containersToDecrease.addAll(containersToDecrease); + } + + private void addContainersToDecreaseToProto() { + maybeInitBuilder(); + builder.clearContainersToDecrease(); + if (this.containersToDecrease == null) { + return; + } + + Iterable iterable = new + Iterable() { + @Override + public Iterator iterator() { + return new Iterator() { + private Iterator iter = containersToDecrease.iterator(); + @Override + public boolean hasNext() { + return iter.hasNext(); + } + @Override + public ContainerProto next() { + return convertToProtoFormat(iter.next()); + } + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + builder.addAllContainersToDecrease(iterable); + } + @Override public Map getSystemCredentialsForApps() { if (this.systemCredentials != null) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java index e7f47af2647..3b37abdee03 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java @@ -65,8 +65,6 @@ import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.api.records.YarnApplicationState; -import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; @@ -447,12 +445,12 @@ public static ApplicationSubmissionContext newApplicationSubmissionContext( queue, priority, amContainer, isUnmanagedAM, cancelTokensWhenComplete, maxAppAttempts, resource, null); } - + public static ApplicationResourceUsageReport newApplicationResourceUsageReport( int numUsedContainers, int numReservedContainers, Resource usedResources, - Resource reservedResources, Resource neededResources, long memorySeconds, - long vcoreSeconds, long preemptedMemorySeconds, - long preemptedVcoreSeconds) { + Resource reservedResources, Resource neededResources, + Map resourceSecondsMap, + Map preemptedResourceSecondsMap) { ApplicationResourceUsageReport report = recordFactory.newRecordInstance(ApplicationResourceUsageReport.class); report.setNumUsedContainers(numUsedContainers); @@ -460,10 +458,8 @@ public static ApplicationResourceUsageReport newApplicationResourceUsageReport( report.setUsedResources(usedResources); report.setReservedResources(reservedResources); report.setNeededResources(neededResources); - report.setMemorySeconds(memorySeconds); - report.setVcoreSeconds(vcoreSeconds); - report.setPreemptedMemorySeconds(preemptedMemorySeconds); - report.setPreemptedVcoreSeconds(preemptedVcoreSeconds); + report.setResourceSecondsMap(resourceSecondsMap); + report.setPreemptedResourceSecondsMap(preemptedResourceSecondsMap); return report; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppAttemptBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppAttemptBlock.java index 87c554d4bec..cad14b67880 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppAttemptBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppAttemptBlock.java @@ -19,8 +19,11 @@ import static org.apache.hadoop.yarn.util.StringHelper.join; import static org.apache.hadoop.yarn.webapp.YarnWebParams.APPLICATION_ATTEMPT_ID; + +import java.io.IOException; import java.security.PrivilegedExceptionAction; import java.util.Collection; +import java.util.List; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.logging.Log; @@ -34,6 +37,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerReport; import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.webapp.dao.AppAttemptInfo; import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet; @@ -77,15 +81,13 @@ protected void render(Block html) { GetApplicationAttemptReportRequest.newInstance(appAttemptId); if (callerUGI == null) { appAttemptReport = - appBaseProt.getApplicationAttemptReport(request) - .getApplicationAttemptReport(); + getApplicationAttemptReport(request); } else { appAttemptReport = callerUGI.doAs( new PrivilegedExceptionAction () { @Override public ApplicationAttemptReport run() throws Exception { - return appBaseProt.getApplicationAttemptReport(request) - .getApplicationAttemptReport(); + return getApplicationAttemptReport(request); } }); } @@ -108,13 +110,13 @@ public ApplicationAttemptReport run() throws Exception { final GetContainersRequest request = GetContainersRequest.newInstance(appAttemptId); if (callerUGI == null) { - containers = appBaseProt.getContainers(request).getContainerList(); + containers = getContainers(request); } else { containers = callerUGI.doAs( new PrivilegedExceptionAction> () { @Override public Collection run() throws Exception { - return appBaseProt.getContainers(request).getContainerList(); + return getContainers(request); } }); } @@ -190,6 +192,18 @@ public Collection run() throws Exception { tbody.__().__(); } + protected List getContainers( + final GetContainersRequest request) throws YarnException, IOException { + return appBaseProt.getContainers(request).getContainerList(); + } + + protected ApplicationAttemptReport getApplicationAttemptReport( + final GetApplicationAttemptReportRequest request) + throws YarnException, IOException { + return appBaseProt.getApplicationAttemptReport(request) + .getApplicationAttemptReport(); + } + protected void generateOverview(ApplicationAttemptReport appAttemptReport, Collection containers, AppAttemptInfo appAttempt, String node) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppBlock.java index 95bc0aa614d..08e75ac8099 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppBlock.java @@ -22,8 +22,10 @@ import static org.apache.hadoop.yarn.webapp.YarnWebParams.APPLICATION_ID; import static org.apache.hadoop.yarn.webapp.YarnWebParams.WEB_UI_TYPE; +import java.io.IOException; import java.security.PrivilegedExceptionAction; import java.util.Collection; +import java.util.List; import java.util.Map; import org.apache.commons.lang.StringEscapeUtils; @@ -49,6 +51,7 @@ import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.ContainerNotFoundException; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.webapp.dao.AppAttemptInfo; import org.apache.hadoop.yarn.server.webapp.dao.AppInfo; import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo; @@ -114,8 +117,7 @@ protected void render(Block html) { new PrivilegedExceptionAction () { @Override public ApplicationReport run() throws Exception { - return appBaseProt.getApplicationReport(request) - .getApplicationReport(); + return getApplicationReport(request); } }); } @@ -190,8 +192,7 @@ public ApplicationReport run() throws Exception { ApplicationAttemptReport>>() { @Override public Collection run() throws Exception { - return appBaseProt.getApplicationAttempts(request) - .getApplicationAttemptList(); + return getApplicationAttemptsReport(request); } }); } catch (Exception e) { @@ -301,7 +302,7 @@ protected void generateApplicationTable(Block html, appAttemptReport.getAMContainerId()); if (callerUGI == null) { containerReport = - appBaseProt.getContainerReport(request).getContainerReport(); + getContainerReport(request); } else { containerReport = callerUGI.doAs( new PrivilegedExceptionAction() { @@ -310,8 +311,7 @@ public ContainerReport run() throws Exception { ContainerReport report = null; if (request.getContainerId() != null) { try { - report = appBaseProt.getContainerReport(request) - .getContainerReport(); + report = getContainerReport(request); } catch (ContainerNotFoundException ex) { LOG.warn(ex.getMessage()); } @@ -364,6 +364,26 @@ public ContainerReport run() throws Exception { tbody.__().__(); } + protected ContainerReport getContainerReport( + final GetContainerReportRequest request) + throws YarnException, IOException { + return appBaseProt.getContainerReport(request).getContainerReport(); + } + + protected List getApplicationAttemptsReport( + final GetApplicationAttemptsRequest request) + throws YarnException, IOException { + return appBaseProt.getApplicationAttempts(request) + .getApplicationAttemptList(); + } + + protected ApplicationReport getApplicationReport( + final GetApplicationReportRequest request) + throws YarnException, IOException { + return appBaseProt.getApplicationReport(request).getApplicationReport(); + } + + private String clarifyAppState(YarnApplicationState state) { String ret = state.toString(); switch (state) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppsBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppsBlock.java index d836e641177..e3a47de0ec5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppsBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppsBlock.java @@ -30,6 +30,7 @@ import java.security.PrivilegedExceptionAction; import java.util.Collection; import java.util.EnumSet; +import java.util.List; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.math.LongRange; @@ -110,20 +111,24 @@ protected void fetchData() throws YarnException, IOException, new LongRange(appStartedTimeBegain, appStartedTimeEnd)); if (callerUGI == null) { - appReports = appBaseProt.getApplications(request).getApplicationList(); + appReports = getApplicationReport(request); } else { appReports = callerUGI .doAs(new PrivilegedExceptionAction>() { @Override public Collection run() throws Exception { - return appBaseProt.getApplications(request) - .getApplicationList(); + return getApplicationReport(request); } }); } } + protected List getApplicationReport( + final GetApplicationsRequest request) throws YarnException, IOException { + return appBaseProt.getApplications(request).getApplicationList(); + } + @Override public void render(Block html) { setTitle("Applications"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/ContainerBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/ContainerBlock.java index fa35a3d5273..b8c0bed87d3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/ContainerBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/ContainerBlock.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.yarn.util.StringHelper.join; import static org.apache.hadoop.yarn.webapp.YarnWebParams.CONTAINER_ID; +import java.io.IOException; import java.security.PrivilegedExceptionAction; import org.apache.commons.logging.Log; @@ -30,6 +31,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetContainerReportRequest; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerReport; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo; import org.apache.hadoop.yarn.util.Times; import org.apache.hadoop.yarn.webapp.view.HtmlBlock; @@ -70,15 +72,13 @@ protected void render(Block html) { final GetContainerReportRequest request = GetContainerReportRequest.newInstance(containerId); if (callerUGI == null) { - containerReport = appBaseProt.getContainerReport(request) - .getContainerReport(); + containerReport = getContainerReport(request); } else { containerReport = callerUGI.doAs( new PrivilegedExceptionAction () { @Override public ContainerReport run() throws Exception { - return appBaseProt.getContainerReport(request) - .getContainerReport(); + return getContainerReport(request); } }); } @@ -126,4 +126,10 @@ public ContainerReport run() throws Exception { html.__(InfoBlock.class); } + + protected ContainerReport getContainerReport( + final GetContainerReportRequest request) + throws YarnException, IOException { + return appBaseProt.getContainerReport(request).getContainerReport(); + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/WebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/WebServices.java index 904c5118f44..6bb6c984544 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/WebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/WebServices.java @@ -18,11 +18,13 @@ package org.apache.hadoop.yarn.server.webapp; +import java.io.IOException; import java.lang.reflect.UndeclaredThrowableException; import java.security.PrivilegedExceptionAction; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; +import java.util.List; import java.util.Set; import javax.servlet.http.HttpServletRequest; @@ -51,13 +53,13 @@ import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException; import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; import org.apache.hadoop.yarn.exceptions.ContainerNotFoundException; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.webapp.dao.AppAttemptInfo; import org.apache.hadoop.yarn.server.webapp.dao.AppAttemptsInfo; import org.apache.hadoop.yarn.server.webapp.dao.AppInfo; import org.apache.hadoop.yarn.server.webapp.dao.AppsInfo; import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo; import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo; -import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.webapp.BadRequestException; import org.apache.hadoop.yarn.webapp.ForbiddenException; import org.apache.hadoop.yarn.webapp.NotFoundException; @@ -154,13 +156,13 @@ public AppsInfo getApps(HttpServletRequest req, HttpServletResponse res, if (callerUGI == null) { // TODO: the request should take the params like what RMWebServices does // in YARN-1819. - appReports = appBaseProt.getApplications(request).getApplicationList(); + appReports = getApplicationsReport(request); } else { appReports = callerUGI.doAs( new PrivilegedExceptionAction> () { @Override public Collection run() throws Exception { - return appBaseProt.getApplications(request).getApplicationList(); + return getApplicationsReport(request); } }); } @@ -220,7 +222,7 @@ public AppInfo getApp(HttpServletRequest req, HttpServletResponse res, if (callerUGI == null) { GetApplicationReportRequest request = GetApplicationReportRequest.newInstance(id); - app = appBaseProt.getApplicationReport(request).getApplicationReport(); + app = getApplicationReport(request); } else { app = callerUGI.doAs( new PrivilegedExceptionAction () { @@ -228,7 +230,7 @@ public AppInfo getApp(HttpServletRequest req, HttpServletResponse res, public ApplicationReport run() throws Exception { GetApplicationReportRequest request = GetApplicationReportRequest.newInstance(id); - return appBaseProt.getApplicationReport(request).getApplicationReport(); + return getApplicationReport(request); } }); } @@ -251,8 +253,7 @@ public AppAttemptsInfo getAppAttempts(HttpServletRequest req, GetApplicationAttemptsRequest request = GetApplicationAttemptsRequest.newInstance(id); appAttemptReports = - appBaseProt.getApplicationAttempts(request) - .getApplicationAttemptList(); + getApplicationAttemptsReport(request); } else { appAttemptReports = callerUGI.doAs( new PrivilegedExceptionAction> () { @@ -260,8 +261,7 @@ public AppAttemptsInfo getAppAttempts(HttpServletRequest req, public Collection run() throws Exception { GetApplicationAttemptsRequest request = GetApplicationAttemptsRequest.newInstance(id); - return appBaseProt.getApplicationAttempts(request) - .getApplicationAttemptList(); + return getApplicationAttemptsReport(request); } }); } @@ -292,8 +292,7 @@ public AppAttemptInfo getAppAttempt(HttpServletRequest req, GetApplicationAttemptReportRequest request = GetApplicationAttemptReportRequest.newInstance(aaid); appAttempt = - appBaseProt.getApplicationAttemptReport(request) - .getApplicationAttemptReport(); + getApplicationAttemptReport(request); } else { appAttempt = callerUGI.doAs( new PrivilegedExceptionAction () { @@ -301,8 +300,7 @@ public AppAttemptInfo getAppAttempt(HttpServletRequest req, public ApplicationAttemptReport run() throws Exception { GetApplicationAttemptReportRequest request = GetApplicationAttemptReportRequest.newInstance(aaid); - return appBaseProt.getApplicationAttemptReport(request) - .getApplicationAttemptReport(); + return getApplicationAttemptReport(request); } }); } @@ -327,14 +325,14 @@ public ContainersInfo getContainers(HttpServletRequest req, if (callerUGI == null) { GetContainersRequest request = GetContainersRequest.newInstance(aaid); containerReports = - appBaseProt.getContainers(request).getContainerList(); + getContainersReport(request); } else { containerReports = callerUGI.doAs( new PrivilegedExceptionAction> () { @Override public Collection run() throws Exception { GetContainersRequest request = GetContainersRequest.newInstance(aaid); - return appBaseProt.getContainers(request).getContainerList(); + return getContainersReport(request); } }); } @@ -366,7 +364,7 @@ public ContainerInfo getContainer(HttpServletRequest req, GetContainerReportRequest request = GetContainerReportRequest.newInstance(cid); container = - appBaseProt.getContainerReport(request).getContainerReport(); + getContainerReport(request); } else { container = callerUGI.doAs( new PrivilegedExceptionAction () { @@ -374,7 +372,7 @@ public ContainerInfo getContainer(HttpServletRequest req, public ContainerReport run() throws Exception { GetContainerReportRequest request = GetContainerReportRequest.newInstance(cid); - return appBaseProt.getContainerReport(request).getContainerReport(); + return getContainerReport(request); } }); } @@ -516,4 +514,36 @@ private static void rewrapAndThrowThrowable(Throwable t) { } } + protected ApplicationReport getApplicationReport( + GetApplicationReportRequest request) throws YarnException, IOException { + return appBaseProt.getApplicationReport(request).getApplicationReport(); + } + + protected List getApplicationsReport( + final GetApplicationsRequest request) throws YarnException, IOException { + return appBaseProt.getApplications(request).getApplicationList(); + } + + protected ApplicationAttemptReport getApplicationAttemptReport( + GetApplicationAttemptReportRequest request) + throws YarnException, IOException { + return appBaseProt.getApplicationAttemptReport(request) + .getApplicationAttemptReport(); + } + + protected List getApplicationAttemptsReport( + GetApplicationAttemptsRequest request) throws YarnException, IOException { + return appBaseProt.getApplicationAttempts(request) + .getApplicationAttemptList(); + } + + protected ContainerReport getContainerReport( + GetContainerReportRequest request) throws YarnException, IOException { + return appBaseProt.getContainerReport(request).getContainerReport(); + } + + protected List getContainersReport( + GetContainersRequest request) throws YarnException, IOException { + return appBaseProt.getContainers(request).getContainerList(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/ContainerLogsInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/ContainerLogsInfo.java index bc3ab393bc5..1bb0408d944 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/ContainerLogsInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/ContainerLogsInfo.java @@ -27,14 +27,14 @@ import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.logaggregation.ContainerLogMeta; import org.apache.hadoop.yarn.logaggregation.ContainerLogAggregationType; -import org.apache.hadoop.yarn.logaggregation.PerContainerLogFileInfo; +import org.apache.hadoop.yarn.logaggregation.ContainerLogFileInfo; /** * {@code ContainerLogsInfo} includes the log meta-data of containers. *

* The container log meta-data includes details such as: *

    - *
  • A list of {@link PerContainerLogFileInfo}.
  • + *
  • A list of {@link ContainerLogFileInfo}.
  • *
  • The container Id.
  • *
  • The NodeManager Id.
  • *
  • The logType: could be local or aggregated
  • @@ -46,7 +46,7 @@ public class ContainerLogsInfo { @XmlElement(name = "containerLogInfo") - protected List containerLogsInfo; + protected List containerLogsInfo; @XmlElement(name = "logAggregationType") protected String logType; @@ -62,14 +62,14 @@ public ContainerLogsInfo() {} public ContainerLogsInfo(ContainerLogMeta logMeta, ContainerLogAggregationType logType) throws YarnException { - this.containerLogsInfo = new ArrayList( + this.containerLogsInfo = new ArrayList( logMeta.getContainerLogMeta()); this.logType = logType.toString(); this.containerId = logMeta.getContainerId(); this.nodeId = logMeta.getNodeId(); } - public List getContainerLogsInfo() { + public List getContainerLogsInfo() { return this.containerLogsInfo; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto index c2ba6772265..e889cdec82c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto @@ -174,6 +174,7 @@ message NMContainerStatusProto { optional int64 creation_time = 7; optional string nodeLabelExpression = 8; optional int32 version = 9; + optional ExecutionTypeProto executionType = 10 [default = GUARANTEED]; } message SCMUploaderNotifyRequestProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/MockResourceManagerFacade.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/MockResourceManagerFacade.java index e33d7e19774..769296b74b2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/MockResourceManagerFacade.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/MockResourceManagerFacade.java @@ -41,6 +41,10 @@ import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -73,6 +77,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileResponse; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest; @@ -852,4 +858,22 @@ public String[] getGroupsForUser(String user) throws IOException { return new String[0]; } + + @Override + public GetAllResourceProfilesResponse getResourceProfiles( + GetAllResourceProfilesRequest request) throws YarnException, IOException { + return null; + } + + @Override + public GetResourceProfileResponse getResourceProfile( + GetResourceProfileRequest request) throws YarnException, IOException { + return null; + } + + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException { + return null; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java index b6fb4ecf47d..da50d7a3a2c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java @@ -51,6 +51,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerPrepareContext; import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerHardwareUtils; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerLivenessContext; @@ -663,7 +664,8 @@ public void activateContainer(ContainerId containerId, Path pidFilePath) { } // LinuxContainerExecutor overrides this method and behaves differently. - public String[] getIpAndHost(Container container) { + public String[] getIpAndHost(Container container) + throws ContainerExecutionException { return getLocalIpAndHost(container); } @@ -697,6 +699,28 @@ public void deactivateContainer(ContainerId containerId) { } } + /** + * Pause the container. The default implementation is to raise a kill event. + * Specific executor implementations can override this behavior. + * @param container + * the Container + */ + public void pauseContainer(Container container) { + LOG.warn(container.getContainerId() + " doesn't support pausing."); + throw new UnsupportedOperationException(); + } + + /** + * Resume the container from pause state. The default implementation ignores + * this event. Specific implementations can override this behavior. + * @param container + * the Container + */ + public void resumeContainer(Container container) { + LOG.warn(container.getContainerId() + " doesn't support resume."); + throw new UnsupportedOperationException(); + } + /** * Get the process-identifier for the container. * diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerStateTransitionListener.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerStateTransitionListener.java new file mode 100644 index 00000000000..24cdb1f7abb --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerStateTransitionListener.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; +import org.apache.hadoop.yarn.state.StateTransitionListener; + +/** + * Interface to be used by external cluster operators to implement a + * State Transition listener that is notified before and after a container + * state transition. + * NOTE: The pre and post transition callbacks will be made in the synchronized + * block as the call to the instrumented transition - Serially, in the + * order: preTransition, transition and postTransition. The implementor + * must ensure that the callbacks return in a timely manner to avoid + * blocking the state-machine. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public interface ContainerStateTransitionListener extends + StateTransitionListener { + + /** + * Init method which will be invoked by the Node Manager to inject the + * NM {@link Context}. + * @param context NM Context. + */ + void init(Context context); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java index 00bd0efcb17..a2d00a4cc24 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java @@ -120,4 +120,6 @@ public interface Context { NMTimelinePublisher getNMTimelinePublisher(); ContainerExecutor getContainerExecutor(); + + ContainerStateTransitionListener getContainerStateTransitionListener(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java index dc686801424..2971f83d3dc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java @@ -625,7 +625,8 @@ private ContainerRuntimeContext buildContainerRuntimeContext( } @Override - public String[] getIpAndHost(Container container) { + public String[] getIpAndHost(Container container) + throws ContainerExecutionException { return linuxContainerRuntime.getIpAndHost(container); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 3e919c5cdad..a97b3f2c2aa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -20,12 +20,18 @@ import java.io.IOException; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; +import org.apache.hadoop.yarn.state.MultiStateTransitionListener; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -130,6 +136,17 @@ public int getExitCode() { private boolean rmWorkPreservingRestartEnabled; private boolean shouldExitOnShutdownEvent = false; + /** + * Default Container State transition listener. + */ + public static class DefaultContainerStateListener extends + MultiStateTransitionListener + + implements ContainerStateTransitionListener { + @Override + public void init(Context context) {} + } + public NodeManager() { super(NodeManager.class.getName()); } @@ -219,8 +236,22 @@ protected NMContext createNMContext( NMTokenSecretManagerInNM nmTokenSecretManager, NMStateStoreService stateStore, boolean isDistSchedulerEnabled, Configuration conf) { - return new NMContext(containerTokenSecretManager, nmTokenSecretManager, - dirsHandler, aclsManager, stateStore, isDistSchedulerEnabled, conf); + List listeners = + conf.getInstances( + YarnConfiguration.NM_CONTAINER_STATE_TRANSITION_LISTENERS, + ContainerStateTransitionListener.class); + NMContext nmContext = new NMContext(containerTokenSecretManager, + nmTokenSecretManager, dirsHandler, aclsManager, stateStore, + isDistSchedulerEnabled, conf); + DefaultContainerStateListener defaultListener = + new DefaultContainerStateListener(); + nmContext.setContainerStateTransitionListener(defaultListener); + defaultListener.init(nmContext); + for (ContainerStateTransitionListener listener : listeners) { + listener.init(nmContext); + defaultListener.addListener(listener); + } + return nmContext; } protected void doSecureLogin() throws IOException { @@ -563,6 +594,8 @@ public static class NMContext implements Context { private NMTimelinePublisher nmTimelinePublisher; + private ContainerStateTransitionListener containerStateTransitionListener; + public NMContext(NMContainerTokenSecretManager containerTokenSecretManager, NMTokenSecretManagerInNM nmTokenSecretManager, LocalDirsHandlerService dirsHandler, ApplicationACLsManager aclsManager, @@ -752,6 +785,17 @@ public ContainerExecutor getContainerExecutor() { public void setContainerExecutor(ContainerExecutor executor) { this.executor = executor; } + + @Override + public ContainerStateTransitionListener + getContainerStateTransitionListener() { + return this.containerStateTransitionListener; + } + + public void setContainerStateTransitionListener( + ContainerStateTransitionListener transitionListener) { + this.containerStateTransitionListener = transitionListener; + } } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index 35b7cb0e5f1..3efe0bc1e18 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -174,27 +174,28 @@ public NodeStatusUpdaterImpl(Context context, Dispatcher dispatcher, @Override protected void serviceInit(Configuration conf) throws Exception { - int memoryMb = NodeManagerHardwareUtils.getContainerMemoryMB(conf); + this.totalResource = NodeManagerHardwareUtils.getNodeResources(conf); + long memoryMb = totalResource.getMemorySize(); float vMemToPMem = conf.getFloat( YarnConfiguration.NM_VMEM_PMEM_RATIO, YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO); - int virtualMemoryMb = (int)Math.ceil(memoryMb * vMemToPMem); + long virtualMemoryMb = (long)Math.ceil(memoryMb * vMemToPMem); - int virtualCores = NodeManagerHardwareUtils.getVCores(conf); + int virtualCores = totalResource.getVirtualCores(); LOG.info("Nodemanager resources: memory set to " + memoryMb + "MB."); LOG.info("Nodemanager resources: vcores set to " + virtualCores + "."); + LOG.info("Nodemanager resources: " + totalResource); - this.totalResource = Resource.newInstance(memoryMb, virtualCores); metrics.addResource(totalResource); // Get actual node physical resources - int physicalMemoryMb = memoryMb; + long physicalMemoryMb = memoryMb; int physicalCores = virtualCores; ResourceCalculatorPlugin rcp = ResourceCalculatorPlugin.getNodeResourceMonitorPlugin(conf); if (rcp != null) { - physicalMemoryMb = (int) (rcp.getPhysicalMemorySize() / (1024 * 1024)); + physicalMemoryMb = rcp.getPhysicalMemorySize() / (1024 * 1024); physicalCores = rcp.getNumProcessors(); } this.physicalResource = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java index ac9fbb7070c..86f2554af7d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java @@ -96,4 +96,13 @@ public interface Container extends EventHandler { void sendKillEvent(int exitStatus, String description); boolean isRecovering(); + + /** + * Get assigned resource mappings to the container. + * + * @return Resource Mappings of the container + */ + ResourceMappings getResourceMappings(); + + void sendPauseEvent(String description); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java index afea0e6cbd0..147543567d0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java @@ -27,6 +27,8 @@ public enum ContainerEventType { CONTAINER_DONE, REINITIALIZE_CONTAINER, ROLLBACK_REINIT, + PAUSE_CONTAINER, + RESUME_CONTAINER, // DownloadManager CONTAINER_INITED, @@ -38,5 +40,7 @@ public enum ContainerEventType { CONTAINER_LAUNCHED, CONTAINER_EXITED_WITH_SUCCESS, CONTAINER_EXITED_WITH_FAILURE, - CONTAINER_KILLED_ON_REQUEST + CONTAINER_KILLED_ON_REQUEST, + CONTAINER_PAUSED, + CONTAINER_RESUMED } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 772b6e7660f..836e70e68f0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -185,6 +185,7 @@ private ReInitializationContext createContextForRollback() { private boolean recoveredAsKilled = false; private Context context; private ResourceSet resourceSet; + private ResourceMappings resourceMappings; public ContainerImpl(Configuration conf, Dispatcher dispatcher, ContainerLaunchContext launchContext, Credentials creds, @@ -239,9 +240,11 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, this.containerRetryContext = configureRetryContext( conf, launchContext, this.containerId); this.remainingRetryAttempts = this.containerRetryContext.getMaxRetries(); - stateMachine = stateMachineFactory.make(this); + stateMachine = stateMachineFactory.make(this, ContainerState.NEW, + context.getContainerStateTransitionListener()); this.context = context; this.resourceSet = new ResourceSet(); + this.resourceMappings = new ResourceMappings(); } private static ContainerRetryContext configureRetryContext( @@ -282,6 +285,7 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, this.remainingRetryAttempts = rcs.getRemainingRetryAttempts(); this.workDir = rcs.getWorkDir(); this.logDir = rcs.getLogDir(); + this.resourceMappings = rcs.getResourceMappings(); } private static final ContainerDiagnosticsUpdateTransition UPDATE_DIAGNOSTICS_TRANSITION = @@ -304,6 +308,8 @@ ContainerEventType.INIT_CONTAINER, new RequestResourcesTransition()) UPDATE_DIAGNOSTICS_TRANSITION) .addTransition(ContainerState.NEW, ContainerState.DONE, ContainerEventType.KILL_CONTAINER, new KillOnNewTransition()) + .addTransition(ContainerState.NEW, ContainerState.DONE, + ContainerEventType.PAUSE_CONTAINER, new KillOnPauseTransition()) // From LOCALIZING State .addTransition(ContainerState.LOCALIZING, @@ -319,6 +325,8 @@ ContainerEventType.RESOURCE_LOCALIZED, new LocalizedTransition()) .addTransition(ContainerState.LOCALIZING, ContainerState.KILLING, ContainerEventType.KILL_CONTAINER, new KillBeforeRunningTransition()) + .addTransition(ContainerState.LOCALIZING, ContainerState.KILLING, + ContainerEventType.PAUSE_CONTAINER, new KillOnPauseTransition()) // From LOCALIZATION_FAILED State .addTransition(ContainerState.LOCALIZATION_FAILED, @@ -332,7 +340,8 @@ ContainerEventType.RESOURCE_LOCALIZED, new LocalizedTransition()) // container not launched so kill is a no-op .addTransition(ContainerState.LOCALIZATION_FAILED, ContainerState.LOCALIZATION_FAILED, - ContainerEventType.KILL_CONTAINER) + EnumSet.of(ContainerEventType.KILL_CONTAINER, + ContainerEventType.PAUSE_CONTAINER)) // container cleanup triggers a release of all resources // regardless of whether they were localized or not // LocalizedResource handles release event in all states @@ -388,6 +397,76 @@ ContainerEventType.KILL_CONTAINER, new KillTransition()) ContainerState.EXITED_WITH_FAILURE, ContainerEventType.CONTAINER_KILLED_ON_REQUEST, new KilledExternallyTransition()) + .addTransition(ContainerState.RUNNING, ContainerState.PAUSING, + ContainerEventType.PAUSE_CONTAINER, new PauseContainerTransition()) + + // From PAUSING State + .addTransition(ContainerState.PAUSING, ContainerState.KILLING, + ContainerEventType.KILL_CONTAINER, new KillTransition()) + .addTransition(ContainerState.PAUSING, ContainerState.PAUSING, + ContainerEventType.UPDATE_DIAGNOSTICS_MSG, + UPDATE_DIAGNOSTICS_TRANSITION) + .addTransition(ContainerState.PAUSING, ContainerState.PAUSED, + ContainerEventType.CONTAINER_PAUSED, new PausedContainerTransition()) + // In case something goes wrong then container will exit from the + // PAUSING state + .addTransition(ContainerState.PAUSING, + ContainerState.EXITED_WITH_SUCCESS, + ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS) + .addTransition(ContainerState.PAUSING, + ContainerState.EXITED_WITH_FAILURE, + ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, + new ExitedWithFailureTransition(true)) + .addTransition(ContainerState.PAUSING, ContainerState.EXITED_WITH_FAILURE, + ContainerEventType.CONTAINER_KILLED_ON_REQUEST, + new KilledExternallyTransition()) + + // From PAUSED State + .addTransition(ContainerState.PAUSED, ContainerState.KILLING, + ContainerEventType.KILL_CONTAINER, new KillTransition()) + .addTransition(ContainerState.PAUSED, ContainerState.PAUSED, + ContainerEventType.UPDATE_DIAGNOSTICS_MSG, + UPDATE_DIAGNOSTICS_TRANSITION) + .addTransition(ContainerState.PAUSED, ContainerState.PAUSED, + ContainerEventType.PAUSE_CONTAINER) + .addTransition(ContainerState.PAUSED, ContainerState.RESUMING, + ContainerEventType.RESUME_CONTAINER, new ResumeContainerTransition()) + // In case something goes wrong then container will exit from the + // PAUSED state + .addTransition(ContainerState.PAUSED, + ContainerState.EXITED_WITH_FAILURE, + ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, + new ExitedWithFailureTransition(true)) + .addTransition(ContainerState.PAUSED, ContainerState.EXITED_WITH_FAILURE, + ContainerEventType.CONTAINER_KILLED_ON_REQUEST, + new KilledExternallyTransition()) + .addTransition(ContainerState.PAUSED, + ContainerState.EXITED_WITH_SUCCESS, + ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS, + new ExitedWithSuccessTransition(true)) + + // From RESUMING State + .addTransition(ContainerState.RESUMING, ContainerState.KILLING, + ContainerEventType.KILL_CONTAINER, new KillTransition()) + .addTransition(ContainerState.RESUMING, ContainerState.RUNNING, + ContainerEventType.CONTAINER_RESUMED) + .addTransition(ContainerState.RESUMING, ContainerState.RESUMING, + ContainerEventType.UPDATE_DIAGNOSTICS_MSG, + UPDATE_DIAGNOSTICS_TRANSITION) + // In case something goes wrong then container will exit from the + // RESUMING state + .addTransition(ContainerState.RESUMING, + ContainerState.EXITED_WITH_FAILURE, + ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, + new ExitedWithFailureTransition(true)) + .addTransition(ContainerState.RESUMING, + ContainerState.EXITED_WITH_FAILURE, + ContainerEventType.CONTAINER_KILLED_ON_REQUEST, + new KilledExternallyTransition()) + .addTransition(ContainerState.RESUMING, + ContainerState.EXITED_WITH_SUCCESS, + ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS, + new ExitedWithSuccessTransition(true)) // From REINITIALIZING State .addTransition(ContainerState.REINITIALIZING, @@ -411,6 +490,8 @@ ContainerEventType.KILL_CONTAINER, new KillTransition()) UPDATE_DIAGNOSTICS_TRANSITION) .addTransition(ContainerState.REINITIALIZING, ContainerState.KILLING, ContainerEventType.KILL_CONTAINER, new KillTransition()) + .addTransition(ContainerState.REINITIALIZING, ContainerState.KILLING, + ContainerEventType.PAUSE_CONTAINER, new KillOnPauseTransition()) .addTransition(ContainerState.REINITIALIZING, ContainerState.SCHEDULED, ContainerEventType.CONTAINER_KILLED_ON_REQUEST, @@ -428,6 +509,8 @@ ContainerEventType.CONTAINER_LAUNCHED, new LaunchTransition()) UPDATE_DIAGNOSTICS_TRANSITION) .addTransition(ContainerState.RELAUNCHING, ContainerState.KILLING, ContainerEventType.KILL_CONTAINER, new KillTransition()) + .addTransition(ContainerState.RELAUNCHING, ContainerState.KILLING, + ContainerEventType.PAUSE_CONTAINER, new KillOnPauseTransition()) // From CONTAINER_EXITED_WITH_SUCCESS State .addTransition(ContainerState.EXITED_WITH_SUCCESS, ContainerState.DONE, @@ -439,7 +522,8 @@ ContainerEventType.KILL_CONTAINER, new KillTransition()) UPDATE_DIAGNOSTICS_TRANSITION) .addTransition(ContainerState.EXITED_WITH_SUCCESS, ContainerState.EXITED_WITH_SUCCESS, - ContainerEventType.KILL_CONTAINER) + EnumSet.of(ContainerEventType.KILL_CONTAINER, + ContainerEventType.PAUSE_CONTAINER)) // From EXITED_WITH_FAILURE State .addTransition(ContainerState.EXITED_WITH_FAILURE, ContainerState.DONE, @@ -451,7 +535,8 @@ ContainerEventType.KILL_CONTAINER, new KillTransition()) UPDATE_DIAGNOSTICS_TRANSITION) .addTransition(ContainerState.EXITED_WITH_FAILURE, ContainerState.EXITED_WITH_FAILURE, - ContainerEventType.KILL_CONTAINER) + EnumSet.of(ContainerEventType.KILL_CONTAINER, + ContainerEventType.PAUSE_CONTAINER)) // From KILLING State. .addTransition(ContainerState.KILLING, @@ -485,7 +570,8 @@ ContainerEventType.KILL_CONTAINER, new KillTransition()) // in the container launcher .addTransition(ContainerState.KILLING, ContainerState.KILLING, - ContainerEventType.CONTAINER_LAUNCHED) + EnumSet.of(ContainerEventType.CONTAINER_LAUNCHED, + ContainerEventType.PAUSE_CONTAINER)) // From CONTAINER_CLEANEDUP_AFTER_KILL State. .addTransition(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, @@ -501,11 +587,13 @@ ContainerEventType.KILL_CONTAINER, new KillTransition()) EnumSet.of(ContainerEventType.KILL_CONTAINER, ContainerEventType.RESOURCE_FAILED, ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS, - ContainerEventType.CONTAINER_EXITED_WITH_FAILURE)) + ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, + ContainerEventType.PAUSE_CONTAINER)) // From DONE .addTransition(ContainerState.DONE, ContainerState.DONE, - ContainerEventType.KILL_CONTAINER) + EnumSet.of(ContainerEventType.KILL_CONTAINER, + ContainerEventType.PAUSE_CONTAINER)) .addTransition(ContainerState.DONE, ContainerState.DONE, ContainerEventType.INIT_CONTAINER) .addTransition(ContainerState.DONE, ContainerState.DONE, @@ -528,9 +616,12 @@ ContainerEventType.KILL_CONTAINER, new KillTransition()) public org.apache.hadoop.yarn.api.records.ContainerState getCurrentState() { switch (stateMachine.getCurrentState()) { case NEW: + return org.apache.hadoop.yarn.api.records.ContainerState.NEW; case LOCALIZING: case LOCALIZATION_FAILED: case SCHEDULED: + case PAUSED: + case RESUMING: return org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED; case RUNNING: case RELAUNCHING: @@ -540,6 +631,7 @@ public org.apache.hadoop.yarn.api.records.ContainerState getCurrentState() { case KILLING: case CONTAINER_CLEANEDUP_AFTER_KILL: case CONTAINER_RESOURCES_CLEANINGUP: + case PAUSING: return org.apache.hadoop.yarn.api.records.ContainerState.RUNNING; case DONE: default: @@ -629,7 +721,8 @@ public NMContainerStatus getNMContainerStatus() { getCurrentState(), getResource(), diagnostics.toString(), exitCode, containerTokenIdentifier.getPriority(), containerTokenIdentifier.getCreationTime(), - containerTokenIdentifier.getNodeLabelExpression()); + containerTokenIdentifier.getNodeLabelExpression(), + containerTokenIdentifier.getExecutionType()); } finally { this.readLock.unlock(); } @@ -725,23 +818,38 @@ private void sendFinishedEvents() { @SuppressWarnings("unchecked") // dispatcher not typed @Override public void sendLaunchEvent() { - ContainersLauncherEventType launcherEvent = - ContainersLauncherEventType.LAUNCH_CONTAINER; - if (recoveredStatus == RecoveredContainerStatus.LAUNCHED) { - // try to recover a container that was previously launched - launcherEvent = ContainersLauncherEventType.RECOVER_CONTAINER; + if (ContainerState.PAUSED == getContainerState()) { + dispatcher.getEventHandler().handle( + new ContainerResumeEvent(containerId, + "Container Resumed as some resources freed up")); + } else { + ContainersLauncherEventType launcherEvent = + ContainersLauncherEventType.LAUNCH_CONTAINER; + if (recoveredStatus == RecoveredContainerStatus.LAUNCHED) { + // try to recover a container that was previously launched + launcherEvent = ContainersLauncherEventType.RECOVER_CONTAINER; + } + containerLaunchStartTime = clock.getTime(); + dispatcher.getEventHandler().handle( + new ContainersLauncherEvent(this, launcherEvent)); } - containerLaunchStartTime = clock.getTime(); - dispatcher.getEventHandler().handle( - new ContainersLauncherEvent(this, launcherEvent)); + } @SuppressWarnings("unchecked") // dispatcher not typed private void sendScheduleEvent() { - dispatcher.getEventHandler().handle( - new ContainerSchedulerEvent(this, - ContainerSchedulerEventType.SCHEDULE_CONTAINER) - ); + if (recoveredStatus == RecoveredContainerStatus.PAUSED) { + // Recovery is not supported for paused container so we raise the + // launch event which will proceed to kill the paused container instead + // of raising the schedule event. + ContainersLauncherEventType launcherEvent; + launcherEvent = ContainersLauncherEventType.RECOVER_PAUSED_CONTAINER; + dispatcher.getEventHandler() + .handle(new ContainersLauncherEvent(this, launcherEvent)); + } else { + dispatcher.getEventHandler().handle(new ContainerSchedulerEvent(this, + ContainerSchedulerEventType.SCHEDULE_CONTAINER)); + } } @SuppressWarnings("unchecked") // dispatcher not typed @@ -752,6 +860,13 @@ public void sendKillEvent(int exitStatus, String description) { new ContainerKillEvent(containerId, exitStatus, description)); } + @SuppressWarnings("unchecked") // dispatcher not typed + @Override + public void sendPauseEvent(String description) { + dispatcher.getEventHandler().handle( + new ContainerPauseEvent(containerId, description)); + } + @SuppressWarnings("unchecked") // dispatcher not typed private void sendRelaunchEvent() { ContainersLauncherEventType launcherEvent = @@ -1496,6 +1611,26 @@ public void transition(ContainerImpl container, ContainerEvent event) { } } + /** + * Transitions upon receiving PAUSE_CONTAINER. + * - LOCALIZED -> KILLING. + * - REINITIALIZING -> KILLING. + */ + @SuppressWarnings("unchecked") // dispatcher not typed + static class KillOnPauseTransition implements + SingleArcTransition { + + @SuppressWarnings("unchecked") + @Override + public void transition(ContainerImpl container, ContainerEvent event) { + // Kill the process/process-grp + container.setIsReInitializing(false); + container.dispatcher.getEventHandler().handle( + new ContainersLauncherEvent(container, + ContainersLauncherEventType.CLEANUP_CONTAINER)); + } + } + /** * Transition from KILLING to CONTAINER_CLEANEDUP_AFTER_KILL * upon receiving CONTAINER_KILLED_ON_REQUEST. @@ -1686,6 +1821,57 @@ public void transition(ContainerImpl container, ContainerEvent event) { } } + /** + * Transitions upon receiving PAUSE_CONTAINER. + * - RUNNING -> PAUSING + */ + @SuppressWarnings("unchecked") // dispatcher not typed + static class PauseContainerTransition implements + SingleArcTransition { + @Override + public void transition(ContainerImpl container, ContainerEvent event) { + // Pause the process/process-grp if it is supported by the container + container.dispatcher.getEventHandler().handle( + new ContainersLauncherEvent(container, + ContainersLauncherEventType.PAUSE_CONTAINER)); + ContainerPauseEvent pauseEvent = (ContainerPauseEvent) event; + container.addDiagnostics(pauseEvent.getDiagnostic(), "\n"); + } + } + + /** + * Transitions upon receiving PAUSED_CONTAINER. + */ + @SuppressWarnings("unchecked") // dispatcher not typed + static class PausedContainerTransition implements + SingleArcTransition { + @Override + public void transition(ContainerImpl container, ContainerEvent event) { + // Container was PAUSED so tell the scheduler + container.dispatcher.getEventHandler().handle( + new ContainerSchedulerEvent(container, + ContainerSchedulerEventType.CONTAINER_PAUSED)); + } + } + + /** + * Transitions upon receiving RESUME_CONTAINER. + * - PAUSED -> RUNNING + */ + @SuppressWarnings("unchecked") // dispatcher not typed + static class ResumeContainerTransition implements + SingleArcTransition { + @Override + public void transition(ContainerImpl container, ContainerEvent event) { + // Pause the process/process-grp if it is supported by the container + container.dispatcher.getEventHandler().handle( + new ContainersLauncherEvent(container, + ContainersLauncherEventType.RESUME_CONTAINER)); + ContainerResumeEvent resumeEvent = (ContainerResumeEvent) event; + container.addDiagnostics(resumeEvent.getDiagnostic(), "\n"); + } + } + @Override public void handle(ContainerEvent event) { try { @@ -1789,4 +1975,14 @@ public boolean isRecovering() { getContainerState() == ContainerState.NEW); return isRecovering; } + + /** + * Get assigned resource mappings to the container. + * + * @return Resource Mappings of the container + */ + @Override + public ResourceMappings getResourceMappings() { + return resourceMappings; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerPauseEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerPauseEvent.java new file mode 100644 index 00000000000..898304e5e2a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerPauseEvent.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.container; + +import org.apache.hadoop.yarn.api.records.ContainerId; + +/** + * ContainerEvent for ContainerEventType.PAUSE_CONTAINER. + */ +public class ContainerPauseEvent extends ContainerEvent { + + private final String diagnostic; + + public ContainerPauseEvent(ContainerId cId, + String diagnostic) { + super(cId, ContainerEventType.PAUSE_CONTAINER); + this.diagnostic = diagnostic; + } + + public String getDiagnostic() { + return this.diagnostic; + } +} + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerResumeEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerResumeEvent.java new file mode 100644 index 00000000000..d7c9e9ae798 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerResumeEvent.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.container; + +import org.apache.hadoop.yarn.api.records.ContainerId; + +/** + * ContainerEvent for ContainerEventType.RESUME_CONTAINER. + */ +public class ContainerResumeEvent extends ContainerEvent { + + private final String diagnostic; + + public ContainerResumeEvent(ContainerId cId, + String diagnostic) { + super(cId, ContainerEventType.RESUME_CONTAINER); + this.diagnostic = diagnostic; + } + + public String getDiagnostic() { + return this.diagnostic; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerState.java index 91d1356934d..7c3fea805bb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerState.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerState.java @@ -21,5 +21,6 @@ public enum ContainerState { NEW, LOCALIZING, LOCALIZATION_FAILED, SCHEDULED, RUNNING, RELAUNCHING, REINITIALIZING, EXITED_WITH_SUCCESS, EXITED_WITH_FAILURE, KILLING, - CONTAINER_CLEANEDUP_AFTER_KILL, CONTAINER_RESOURCES_CLEANINGUP, DONE + CONTAINER_CLEANEDUP_AFTER_KILL, CONTAINER_RESOURCES_CLEANINGUP, DONE, + PAUSING, PAUSED, RESUMING } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ResourceMappings.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ResourceMappings.java new file mode 100644 index 00000000000..d673341b01c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ResourceMappings.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.container; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.io.IOUtils; + +/** + * This class is used to store assigned resource to a single container by + * resource types. + * + * Assigned resource could be list of String + * + * For example, we can assign container to: + * "numa": ["numa0"] + * "gpu": ["0", "1", "2", "3"] + * "fpga": ["1", "3"] + * + * This will be used for NM restart container recovery. + */ +public class ResourceMappings { + + private Map assignedResourcesMap = new HashMap<>(); + + /** + * Get all resource mappings. + * @param resourceType resourceType + * @return map of resource mapping + */ + public List getAssignedResources(String resourceType) { + AssignedResources ar = assignedResourcesMap.get(resourceType); + if (null == ar) { + return Collections.emptyList(); + } + return ar.getAssignedResources(); + } + + /** + * Adds the resources for a given resource type. + * + * @param resourceType Resource Type + * @param assigned Assigned resources to add + */ + public void addAssignedResources(String resourceType, + AssignedResources assigned) { + assignedResourcesMap.put(resourceType, assigned); + } + + /** + * Stores resources assigned to a container for a given resource type. + */ + public static class AssignedResources implements Serializable { + private static final long serialVersionUID = -1059491941955757926L; + private List resources = Collections.emptyList(); + + public List getAssignedResources() { + return Collections.unmodifiableList(resources); + } + + public void updateAssignedResources(List list) { + this.resources = new ArrayList<>(list); + } + + @SuppressWarnings("unchecked") + public static AssignedResources fromBytes(byte[] bytes) + throws IOException { + ObjectInputStream ois = null; + List resources; + try { + ByteArrayInputStream bis = new ByteArrayInputStream(bytes); + ois = new ObjectInputStream(bis); + resources = (List) ois.readObject(); + } catch (ClassNotFoundException e) { + throw new IOException(e); + } finally { + IOUtils.closeQuietly(ois); + } + AssignedResources ar = new AssignedResources(); + ar.updateAssignedResources(resources); + return ar; + } + + public byte[] toBytes() throws IOException { + ObjectOutputStream oos = null; + byte[] bytes; + try { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + oos = new ObjectOutputStream(bos); + oos.writeObject(resources); + bytes = bos.toByteArray(); + } finally { + IOUtils.closeQuietly(oos); + } + return bytes; + } + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index 50443f31fdf..e2548873a6d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -75,6 +75,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerKillEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService; @@ -86,6 +87,7 @@ import org.apache.hadoop.yarn.util.AuxiliaryServiceHelper; import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.yarn.util.ConverterUtils; public class ContainerLaunch implements Callable { @@ -106,8 +108,10 @@ public class ContainerLaunch implements Callable { private final Configuration conf; private final Context context; private final ContainerManagerImpl containerManager; - + protected AtomicBoolean containerAlreadyLaunched = new AtomicBoolean(false); + protected AtomicBoolean shouldPauseContainer = new AtomicBoolean(false); + protected AtomicBoolean completed = new AtomicBoolean(false); private volatile boolean killedBeforeStart = false; @@ -802,6 +806,106 @@ public static Signal translateCommandToSignal( return signal; } + /** + * Pause the container. + * Cancels the launch if the container isn't launched yet. Otherwise asks the + * executor to pause the container. + * @throws IOException in case of errors. + */ + @SuppressWarnings("unchecked") // dispatcher not typed + public void pauseContainer() throws IOException { + ContainerId containerId = container.getContainerId(); + String containerIdStr = containerId.toString(); + LOG.info("Pausing the container " + containerIdStr); + + // The pause event is only handled if the container is in the running state + // (the container state machine), so we don't check for + // shouldLaunchContainer over here + + if (!shouldPauseContainer.compareAndSet(false, true)) { + LOG.info("Container " + containerId + " not paused as " + + "resume already called"); + return; + } + + try { + // Pause the container + exec.pauseContainer(container); + + // PauseContainer is a blocking call. We are here almost means the + // container is paused, so send out the event. + dispatcher.getEventHandler().handle(new ContainerEvent( + containerId, + ContainerEventType.CONTAINER_PAUSED)); + + try { + this.context.getNMStateStore().storeContainerPaused( + container.getContainerId()); + } catch (IOException e) { + LOG.warn("Could not store container [" + container.getContainerId() + + "] state. The Container has been paused.", e); + } + } catch (Exception e) { + String message = + "Exception when trying to pause container " + containerIdStr + + ": " + StringUtils.stringifyException(e); + LOG.info(message); + container.handle(new ContainerKillEvent(container.getContainerId(), + ContainerExitStatus.PREEMPTED, "Container preempted as there was " + + " an exception in pausing it.")); + } + } + + /** + * Resume the container. + * Cancels the launch if the container isn't launched yet. Otherwise asks the + * executor to pause the container. + * @throws IOException in case of error. + */ + @SuppressWarnings("unchecked") // dispatcher not typed + public void resumeContainer() throws IOException { + ContainerId containerId = container.getContainerId(); + String containerIdStr = containerId.toString(); + LOG.info("Resuming the container " + containerIdStr); + + // The resume event is only handled if the container is in a paused state + // so we don't check for the launched flag here. + + // paused flag will be set to true if process already paused + boolean alreadyPaused = !shouldPauseContainer.compareAndSet(false, true); + if (!alreadyPaused) { + LOG.info("Container " + containerIdStr + " not paused." + + " No resume necessary"); + return; + } + + // If the container has already started + try { + exec.resumeContainer(container); + // ResumeContainer is a blocking call. We are here almost means the + // container is resumed, so send out the event. + dispatcher.getEventHandler().handle(new ContainerEvent( + containerId, + ContainerEventType.CONTAINER_RESUMED)); + + try { + this.context.getNMStateStore().removeContainerPaused( + container.getContainerId()); + } catch (IOException e) { + LOG.warn("Could not store container [" + container.getContainerId() + + "] state. The Container has been resumed.", e); + } + } catch (Exception e) { + String message = + "Exception when trying to resume container " + containerIdStr + + ": " + StringUtils.stringifyException(e); + LOG.info(message); + container.handle(new ContainerKillEvent(container.getContainerId(), + ContainerExitStatus.PREEMPTED, "Container preempted as there was " + + " an exception in pausing it.")); + } + } + /** * Loop through for a time-bounded interval waiting to * read the process id from a file generated by a running process. @@ -1148,10 +1252,7 @@ public void sanitizeEnv(Map environment, Path pwd, environment.put(Environment.PWD.name(), pwd.toString()); - putEnvIfNotNull(environment, - Environment.HADOOP_CONF_DIR.name(), - System.getenv(Environment.HADOOP_CONF_DIR.name()) - ); + putEnvIfAbsent(environment, Environment.HADOOP_CONF_DIR.name()); if (!Shell.WINDOWS) { environment.put("JVM_PID", "$$"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java index 25909b9a2d6..9f6ef743d9b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java @@ -30,6 +30,7 @@ import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.concurrent.HadoopExecutors; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.event.Dispatcher; @@ -41,6 +42,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService; import com.google.common.annotations.VisibleForTesting; @@ -137,6 +139,16 @@ public void handle(ContainersLauncherEvent event) { containerLauncher.submit(launch); running.put(containerId, launch); break; + case RECOVER_PAUSED_CONTAINER: + // Recovery for paused containers is not supported, thus here + // we locate any paused containers, and terminate them. + app = context.getApplications().get( + containerId.getApplicationAttemptId().getApplicationId()); + launch = new RecoverPausedContainerLaunch(context, getConfig(), + dispatcher, exec, app, event.getContainer(), dirsHandler, + containerManager); + containerLauncher.submit(launch); + break; case CLEANUP_CONTAINER: case CLEANUP_CONTAINER_FOR_REINIT: ContainerLaunch launcher = running.remove(containerId); @@ -171,6 +183,36 @@ public void handle(ContainersLauncherEvent event) { + " with command " + signalEvent.getCommand()); } break; + case PAUSE_CONTAINER: + ContainerLaunch launchedContainer = running.get(containerId); + if (launchedContainer == null) { + // Container not launched. So nothing needs to be done. + return; + } + + // Pause the container + try { + launchedContainer.pauseContainer(); + } catch (Exception e) { + LOG.info("Got exception while pausing container: " + + StringUtils.stringifyException(e)); + } + break; + case RESUME_CONTAINER: + ContainerLaunch launchCont = running.get(containerId); + if (launchCont == null) { + // Container not launched. So nothing needs to be done. + return; + } + + // Resume the container. + try { + launchCont.resumeContainer(); + } catch (Exception e) { + LOG.info("Got exception while resuming container: " + + StringUtils.stringifyException(e)); + } + break; } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncherEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncherEventType.java index 380a032ca78..847ee34866e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncherEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncherEventType.java @@ -25,4 +25,8 @@ public enum ContainersLauncherEventType { CLEANUP_CONTAINER, // The process(grp) itself. CLEANUP_CONTAINER_FOR_REINIT, // The process(grp) itself. SIGNAL_CONTAINER, + PAUSE_CONTAINER, + RESUME_CONTAINER, + RECOVER_PAUSED_CONTAINER + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoverPausedContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoverPausedContainerLaunch.java new file mode 100644 index 00000000000..14cab9a2978 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoverPausedContainerLaunch.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.*; +import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext; + +import java.io.File; +import java.io.IOException; +import java.io.InterruptedIOException; + +/** + * This is a ContainerLaunch which has been recovered after an NM restart for + * pause containers (for rolling upgrades) + */ +public class RecoverPausedContainerLaunch extends ContainerLaunch { + + private static final Log LOG = LogFactory.getLog( + RecoveredContainerLaunch.class); + + public RecoverPausedContainerLaunch(Context context, + Configuration configuration, Dispatcher dispatcher, + ContainerExecutor exec, Application app, Container container, + LocalDirsHandlerService dirsHandler, + ContainerManagerImpl containerManager) { + super(context, configuration, dispatcher, exec, app, container, dirsHandler, + containerManager); + } + + /** + * Cleanup the paused container by issuing a kill on it. + */ + @SuppressWarnings("unchecked") + @Override + public Integer call() { + int retCode = ContainerExecutor.ExitCode.LOST.getExitCode(); + ContainerId containerId = container.getContainerId(); + String appIdStr = + containerId.getApplicationAttemptId().getApplicationId().toString(); + String containerIdStr = containerId.toString(); + + boolean notInterrupted = true; + try { + File pidFile = locatePidFile(appIdStr, containerIdStr); + if (pidFile != null) { + String pidPathStr = pidFile.getPath(); + pidFilePath = new Path(pidPathStr); + exec.activateContainer(containerId, pidFilePath); + exec.signalContainer(new ContainerSignalContext.Builder() + .setContainer(container) + .setUser(container.getUser()) + .setSignal(ContainerExecutor.Signal.KILL) + .build()); + } else { + LOG.warn("Unable to locate pid file for container " + containerIdStr); + } + + } catch (InterruptedIOException e) { + LOG.warn("Interrupted while waiting for exit code from " + containerId); + notInterrupted = false; + } catch (IOException e) { + LOG.error("Unable to kill the paused container " + containerIdStr, e); + } finally { + if (notInterrupted) { + this.completed.set(true); + exec.deactivateContainer(containerId); + try { + getContext().getNMStateStore() + .storeContainerCompleted(containerId, retCode); + } catch (IOException e) { + LOG.error("Unable to set exit code for container " + containerId); + } + } + } + + LOG.warn("Recovered container exited with a non-zero exit code " + + retCode); + this.dispatcher.getEventHandler().handle(new ContainerExitEvent( + containerId, + ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, retCode, + "Container exited with a non-zero exit code " + retCode)); + + return retCode; + } + + private File locatePidFile(String appIdStr, String containerIdStr) { + String pidSubpath= getPidFileSubpath(appIdStr, containerIdStr); + for (String dir : getContext().getLocalDirsHandler(). + getLocalDirsForRead()) { + File pidFile = new File(dir, pidSubpath); + if (pidFile.exists()) { + return pidFile; + } + } + return null; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java index 2eba0dffe4f..17ddd77857f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java @@ -40,10 +40,9 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReacquisitionContext; - /** * This is a ContainerLaunch which has been recovered after an NM restart (for - * rolling upgrades) + * rolling upgrades). */ public class RecoveredContainerLaunch extends ContainerLaunch { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java index 44ae12b1059..f37dfd375b7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java @@ -367,17 +367,6 @@ public void initializeCGroupController(CGroupController controller) throws if (enableCGroupMount) { // We have a controller that needs to be mounted mountCGroupController(controller); - } else { - String controllerPath = getControllerPath(controller); - - if (controllerPath == null) { - throw new ResourceHandlerException( - String.format("Controller %s not mounted." - + " You either need to mount it with %s" - + " or mount cgroups before launching Yarn", - controller.getName(), YarnConfiguration. - NM_LINUX_CONTAINER_CGROUPS_MOUNT)); - } } // We are working with a pre-mounted contoller diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java index 52733344199..9fe4927b6e3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DelegatingLinuxContainerRuntime.java @@ -20,9 +20,11 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException; @@ -31,6 +33,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.EnumSet; import java.util.Map; /** @@ -50,34 +53,62 @@ public class DelegatingLinuxContainerRuntime implements LinuxContainerRuntime { private DefaultLinuxContainerRuntime defaultLinuxContainerRuntime; private DockerLinuxContainerRuntime dockerLinuxContainerRuntime; private JavaSandboxLinuxContainerRuntime javaSandboxLinuxContainerRuntime; + private EnumSet allowedRuntimes = + EnumSet.noneOf(LinuxContainerRuntimeConstants.RuntimeType.class); @Override public void initialize(Configuration conf) throws ContainerExecutionException { - PrivilegedOperationExecutor privilegedOperationExecutor = - PrivilegedOperationExecutor.getInstance(conf); - defaultLinuxContainerRuntime = new DefaultLinuxContainerRuntime( - privilegedOperationExecutor); - defaultLinuxContainerRuntime.initialize(conf); - dockerLinuxContainerRuntime = new DockerLinuxContainerRuntime( - privilegedOperationExecutor); - dockerLinuxContainerRuntime.initialize(conf); - javaSandboxLinuxContainerRuntime = new JavaSandboxLinuxContainerRuntime( - privilegedOperationExecutor); - javaSandboxLinuxContainerRuntime.initialize(conf); + String[] configuredRuntimes = conf.getTrimmedStrings( + YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES, + YarnConfiguration.DEFAULT_LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES); + for (String configuredRuntime : configuredRuntimes) { + try { + allowedRuntimes.add( + LinuxContainerRuntimeConstants.RuntimeType.valueOf( + configuredRuntime.toUpperCase())); + } catch (IllegalArgumentException e) { + throw new ContainerExecutionException("Invalid runtime set in " + + YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES + " : " + + configuredRuntime); + } + } + if (isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.JAVASANDBOX)) { + javaSandboxLinuxContainerRuntime = new JavaSandboxLinuxContainerRuntime( + PrivilegedOperationExecutor.getInstance(conf)); + javaSandboxLinuxContainerRuntime.initialize(conf); + } + if (isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.DOCKER)) { + dockerLinuxContainerRuntime = new DockerLinuxContainerRuntime( + PrivilegedOperationExecutor.getInstance(conf)); + dockerLinuxContainerRuntime.initialize(conf); + } + if (isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.DEFAULT)) { + defaultLinuxContainerRuntime = new DefaultLinuxContainerRuntime( + PrivilegedOperationExecutor.getInstance(conf)); + defaultLinuxContainerRuntime.initialize(conf); + } } - private LinuxContainerRuntime pickContainerRuntime( - Map environment){ + @VisibleForTesting + LinuxContainerRuntime pickContainerRuntime( + Map environment) throws ContainerExecutionException { LinuxContainerRuntime runtime; //Sandbox checked first to ensure DockerRuntime doesn't circumvent controls - if (javaSandboxLinuxContainerRuntime.isSandboxContainerRequested()){ - runtime = javaSandboxLinuxContainerRuntime; - } else if (DockerLinuxContainerRuntime - .isDockerContainerRequested(environment)){ + if (javaSandboxLinuxContainerRuntime != null && + javaSandboxLinuxContainerRuntime.isSandboxContainerRequested()){ + runtime = javaSandboxLinuxContainerRuntime; + } else if (dockerLinuxContainerRuntime != null && + DockerLinuxContainerRuntime.isDockerContainerRequested(environment)){ runtime = dockerLinuxContainerRuntime; - } else { + } else if (defaultLinuxContainerRuntime != null && + !DockerLinuxContainerRuntime.isDockerContainerRequested(environment)) { runtime = defaultLinuxContainerRuntime; + } else { + throw new ContainerExecutionException("Requested runtime not allowed."); } if (LOG.isDebugEnabled()) { @@ -88,7 +119,8 @@ private LinuxContainerRuntime pickContainerRuntime( return runtime; } - private LinuxContainerRuntime pickContainerRuntime(Container container) { + private LinuxContainerRuntime pickContainerRuntime(Container container) + throws ContainerExecutionException { return pickContainerRuntime(container.getLaunchContext().getEnvironment()); } @@ -127,8 +159,15 @@ public void reapContainer(ContainerRuntimeContext ctx) } @Override - public String[] getIpAndHost(Container container) { + public String[] getIpAndHost(Container container) + throws ContainerExecutionException { LinuxContainerRuntime runtime = pickContainerRuntime(container); return runtime.getIpAndHost(container); } + + @VisibleForTesting + boolean isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType runtimeType) { + return allowedRuntimes.contains(runtimeType); + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntimeConstants.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntimeConstants.java index 2e632faccc1..3a47523ba12 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntimeConstants.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntimeConstants.java @@ -31,6 +31,15 @@ public final class LinuxContainerRuntimeConstants { private LinuxContainerRuntimeConstants() { } + /** + * Linux container runtime types for {@link DelegatingLinuxContainerRuntime}. + */ + public enum RuntimeType { + DEFAULT, + DOCKER, + JAVASANDBOX; + } + public static final Attribute LOCALIZED_RESOURCES = Attribute .attribute(Map.class, "localized_resources"); public static final Attribute CONTAINER_LAUNCH_PREFIX_COMMANDS = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index d764e1dd498..2b99cc7254b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerKillEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException; import org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher; import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerHardwareUtils; import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; @@ -502,7 +503,8 @@ public void run() { * @param entry process tree entry to fill in */ private void initializeProcessTrees( - Entry entry) { + Entry entry) + throws ContainerExecutionException { ContainerId containerId = entry.getKey(); ProcessTreeInfo ptInfo = entry.getValue(); String pId = ptInfo.getPID(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/runtime/ContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/runtime/ContainerRuntime.java index b15690f808d..7caa0edf4de 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/runtime/ContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/runtime/ContainerRuntime.java @@ -77,6 +77,10 @@ void reapContainer(ContainerRuntimeContext ctx) /** * Return the host and ip of the container + * + * @param container the {@link Container} + * @throws ContainerExecutionException if an error occurs while getting the ip + * and hostname */ - String[] getIpAndHost(Container container); + String[] getIpAndHost(Container container) throws ContainerExecutionException; } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java index 644bdae77a3..830a06d5297 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler; import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -34,6 +35,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor .ChangeMonitoringContainerResourceEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; @@ -74,7 +76,7 @@ public class ContainerScheduler extends AbstractService implements queuedOpportunisticContainers = new LinkedHashMap<>(); // Used to keep track of containers that have been marked to be killed - // to make room for a guaranteed container. + // or paused to make room for a guaranteed container. private final Map oppContainersToKill = new HashMap<>(); @@ -98,6 +100,8 @@ public class ContainerScheduler extends AbstractService implements private final AsyncDispatcher dispatcher; private final NodeManagerMetrics metrics; + private Boolean usePauseEventForPreemption = false; + /** * Instantiate a Container Scheduler. * @param context NodeManager Context. @@ -112,6 +116,17 @@ public ContainerScheduler(Context context, AsyncDispatcher dispatcher, DEFAULT_NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH)); } + + @Override + public void serviceInit(Configuration conf) throws Exception { + super.serviceInit(conf); + this.usePauseEventForPreemption = + conf.getBoolean( + YarnConfiguration.NM_CONTAINER_QUEUING_USE_PAUSE_FOR_PREEMPTION, + YarnConfiguration. + DEFAULT_NM_CONTAINER_QUEUING_USE_PAUSE_FOR_PREEMPTION); + } + @VisibleForTesting public ContainerScheduler(Context context, AsyncDispatcher dispatcher, NodeManagerMetrics metrics, int qLength) { @@ -136,8 +151,9 @@ public void handle(ContainerSchedulerEvent event) { case SCHEDULE_CONTAINER: scheduleContainer(event.getContainer()); break; + case CONTAINER_PAUSED: case CONTAINER_COMPLETED: - onContainerCompleted(event.getContainer()); + onResourcesReclaimed(event.getContainer()); break; case UPDATE_CONTAINER: if (event instanceof UpdateContainerSchedulerEvent) { @@ -203,9 +219,9 @@ private void onUpdateContainer(UpdateContainerSchedulerEvent updateEvent) { queuedGuaranteedContainers.put(containerId, updateEvent.getContainer()); } - //Kill opportunistic containers if any to make room for + //Kill/pause opportunistic containers if any to make room for // promotion request - killOpportunisticContainers(updateEvent.getContainer()); + reclaimOpportunisticContainerResources(updateEvent.getContainer()); } else { // Demotion of queued container.. Should not happen too often // since you should not find too many queued guaranteed @@ -238,6 +254,17 @@ public int getNumQueuedOpportunisticContainers() { return this.queuedOpportunisticContainers.size(); } + @VisibleForTesting + public int getNumRunningContainers() { + return this.runningContainers.size(); + } + + @VisibleForTesting + public void setUsePauseEventForPreemption( + boolean usePauseEventForPreemption) { + this.usePauseEventForPreemption = usePauseEventForPreemption; + } + public OpportunisticContainersStatus getOpportunisticContainersStatus() { this.opportunisticContainersStatus.setQueuedOpportContainers( getNumQueuedOpportunisticContainers()); @@ -252,7 +279,7 @@ public OpportunisticContainersStatus getOpportunisticContainersStatus() { return this.opportunisticContainersStatus; } - private void onContainerCompleted(Container container) { + private void onResourcesReclaimed(Container container) { oppContainersToKill.remove(container.getContainerId()); // This could be killed externally for eg. by the ContainerManager, @@ -274,27 +301,49 @@ private void onContainerCompleted(Container container) { ExecutionType.OPPORTUNISTIC) { this.metrics.completeOpportunisticContainer(container.getResource()); } - startPendingContainers(); + startPendingContainers(false); } } - private void startPendingContainers() { + /** + * Start pending containers in the queue. + * @param forceStartGuaranteedContaieners When this is true, start guaranteed + * container without looking at available resource + */ + private void startPendingContainers(boolean forceStartGuaranteedContaieners) { // Start pending guaranteed containers, if resources available. - boolean resourcesAvailable = - startContainersFromQueue(queuedGuaranteedContainers.values()); + boolean resourcesAvailable = startContainers( + queuedGuaranteedContainers.values(), forceStartGuaranteedContaieners); + // Resume opportunistic containers, if resource available. + if (resourcesAvailable) { + List pausedContainers = new ArrayList(); + Map containers = + context.getContainers(); + for (Map.Entryentry : containers.entrySet()) { + ContainerId contId = entry.getKey(); + // Find containers that were not already started and are in paused state + if(false == runningContainers.containsKey(contId)) { + if(containers.get(contId).getContainerState() + == ContainerState.PAUSED) { + pausedContainers.add(containers.get(contId)); + } + } + } + resourcesAvailable = startContainers(pausedContainers, false); + } // Start opportunistic containers, if resources available. if (resourcesAvailable) { - startContainersFromQueue(queuedOpportunisticContainers.values()); + startContainers(queuedOpportunisticContainers.values(), false); } } - private boolean startContainersFromQueue( - Collection queuedContainers) { - Iterator cIter = queuedContainers.iterator(); + private boolean startContainers( + Collection containersToBeStarted, boolean force) { + Iterator cIter = containersToBeStarted.iterator(); boolean resourcesAvailable = true; while (cIter.hasNext() && resourcesAvailable) { Container container = cIter.next(); - if (tryStartContainer(container)) { + if (tryStartContainer(container, force)) { cIter.remove(); } else { resourcesAvailable = false; @@ -303,9 +352,11 @@ private boolean startContainersFromQueue( return resourcesAvailable; } - private boolean tryStartContainer(Container container) { + private boolean tryStartContainer(Container container, boolean force) { boolean containerStarted = false; - if (resourceAvailableToStartContainer(container)) { + // call startContainer without checking available resource when force==true + if (force || resourceAvailableToStartContainer( + container)) { startContainer(container); containerStarted = true; } @@ -373,12 +424,17 @@ protected void scheduleContainer(Container container) { // enough number of opportunistic containers. if (isGuaranteedContainer) { enqueueContainer(container); - startPendingContainers(); + + // When opportunistic container not allowed (which is determined by + // max-queue length of pending opportunistic containers <= 0), start + // guaranteed containers without looking at available resources. + boolean forceStartGuaranteedContainers = (maxOppQueueLength <= 0); + startPendingContainers(forceStartGuaranteedContainers); // if the guaranteed container is queued, we need to preempt opportunistic // containers for make room for it if (queuedGuaranteedContainers.containsKey(container.getContainerId())) { - killOpportunisticContainers(container); + reclaimOpportunisticContainerResources(container); } } else { // Given an opportunistic container, we first try to start as many queuing @@ -386,29 +442,40 @@ protected void scheduleContainer(Container container) { // containers based on remaining resource available, then enqueue the // opportunistic container. If the container is enqueued, we do another // pass to try to start the newly enqueued opportunistic container. - startPendingContainers(); + startPendingContainers(false); boolean containerQueued = enqueueContainer(container); // container may not get queued because the max opportunistic container // queue length is reached. If so, there is no point doing another pass if (containerQueued) { - startPendingContainers(); + startPendingContainers(false); } } } - private void killOpportunisticContainers(Container container) { - List extraOpportContainersToKill = - pickOpportunisticContainersToKill(container.getContainerId()); + @SuppressWarnings("unchecked") + private void reclaimOpportunisticContainerResources(Container container) { + List extraOppContainersToReclaim = + pickOpportunisticContainersToReclaimResources( + container.getContainerId()); // Kill the opportunistic containers that were chosen. - for (Container contToKill : extraOpportContainersToKill) { - contToKill.sendKillEvent( - ContainerExitStatus.KILLED_BY_CONTAINER_SCHEDULER, - "Container Killed to make room for Guaranteed Container."); - oppContainersToKill.put(contToKill.getContainerId(), contToKill); + for (Container contToReclaim : extraOppContainersToReclaim) { + String preemptionAction = usePauseEventForPreemption == true ? "paused" : + "resumed"; LOG.info( - "Opportunistic container {} will be killed in order to start the " + "Container {} will be {} to start the " + "execution of guaranteed container {}.", - contToKill.getContainerId(), container.getContainerId()); + contToReclaim.getContainerId(), preemptionAction, + container.getContainerId()); + + if (usePauseEventForPreemption) { + contToReclaim.sendPauseEvent( + "Container Paused to make room for Guaranteed Container"); + } else { + contToReclaim.sendKillEvent( + ContainerExitStatus.KILLED_BY_CONTAINER_SCHEDULER, + "Container Killed to make room for Guaranteed Container."); + } + oppContainersToKill.put(contToReclaim.getContainerId(), contToReclaim); } } @@ -423,7 +490,7 @@ private void startContainer(Container container) { container.sendLaunchEvent(); } - private List pickOpportunisticContainersToKill( + private List pickOpportunisticContainersToReclaimResources( ContainerId containerToStartId) { // The opportunistic containers that need to be killed for the // given container to start. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEventType.java index 917eda09af6..a9cbf745a81 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerSchedulerEventType.java @@ -27,4 +27,5 @@ public enum ContainerSchedulerEventType { UPDATE_CONTAINER, // Producer: Node HB response - RM has asked to shed the queue SHED_QUEUED_CONTAINERS, + CONTAINER_PAUSED } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java index a31756e1c95..2f9c0a75065 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java @@ -24,6 +24,7 @@ import java.io.File; import java.io.IOException; +import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -39,6 +40,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainerRequestPBImpl; @@ -60,6 +62,7 @@ import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainerRequestProto; import org.apache.hadoop.yarn.server.api.records.MasterKey; import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings; import org.apache.hadoop.yarn.server.records.Version; import org.apache.hadoop.yarn.server.records.impl.pb.VersionPBImpl; import org.apache.hadoop.yarn.server.utils.LeveldbIterator; @@ -116,6 +119,7 @@ public class NMLeveldbStateStoreService extends NMStateStoreService { private static final String CONTAINER_DIAGS_KEY_SUFFIX = "/diagnostics"; private static final String CONTAINER_LAUNCHED_KEY_SUFFIX = "/launched"; private static final String CONTAINER_QUEUED_KEY_SUFFIX = "/queued"; + private static final String CONTAINER_PAUSED_KEY_SUFFIX = "/paused"; private static final String CONTAINER_RESOURCE_CHANGED_KEY_SUFFIX = "/resourceChanged"; private static final String CONTAINER_KILLED_KEY_SUFFIX = "/killed"; @@ -144,6 +148,9 @@ public class NMLeveldbStateStoreService extends NMStateStoreService { private static final String AMRMPROXY_KEY_PREFIX = "AMRMProxy/"; + private static final String CONTAINER_ASSIGNED_RESOURCES_KEY_SUFFIX = + "/assignedResources_"; + private static final byte[] EMPTY_VALUE = new byte[0]; private DB db; @@ -266,9 +273,16 @@ private RecoveredContainerState loadContainerState(ContainerId containerId, if (rcs.status == RecoveredContainerStatus.REQUESTED) { rcs.status = RecoveredContainerStatus.QUEUED; } + } else if (suffix.equals(CONTAINER_PAUSED_KEY_SUFFIX)) { + if ((rcs.status == RecoveredContainerStatus.LAUNCHED) + ||(rcs.status == RecoveredContainerStatus.QUEUED) + ||(rcs.status == RecoveredContainerStatus.REQUESTED)) { + rcs.status = RecoveredContainerStatus.PAUSED; + } } else if (suffix.equals(CONTAINER_LAUNCHED_KEY_SUFFIX)) { if ((rcs.status == RecoveredContainerStatus.REQUESTED) - || (rcs.status == RecoveredContainerStatus.QUEUED)) { + || (rcs.status == RecoveredContainerStatus.QUEUED) + ||(rcs.status == RecoveredContainerStatus.PAUSED)) { rcs.status = RecoveredContainerStatus.LAUNCHED; } } else if (suffix.equals(CONTAINER_KILLED_KEY_SUFFIX)) { @@ -286,6 +300,13 @@ private RecoveredContainerState loadContainerState(ContainerId containerId, rcs.setWorkDir(asString(entry.getValue())); } else if (suffix.equals(CONTAINER_LOG_DIR_KEY_SUFFIX)) { rcs.setLogDir(asString(entry.getValue())); + } else if (suffix.startsWith(CONTAINER_ASSIGNED_RESOURCES_KEY_SUFFIX)) { + String resourceType = suffix.substring( + CONTAINER_ASSIGNED_RESOURCES_KEY_SUFFIX.length()); + ResourceMappings.AssignedResources assignedResources = + ResourceMappings.AssignedResources.fromBytes(entry.getValue()); + rcs.getResourceMappings().addAssignedResources(resourceType, + assignedResources); } else { LOG.warn("the container " + containerId + " will be killed because of the unknown key " + key @@ -353,6 +374,37 @@ public void storeContainerQueued(ContainerId containerId) throws IOException { } } + @Override + public void storeContainerPaused(ContainerId containerId) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("storeContainerPaused: containerId=" + containerId); + } + + String key = CONTAINERS_KEY_PREFIX + containerId.toString() + + CONTAINER_PAUSED_KEY_SUFFIX; + try { + db.put(bytes(key), EMPTY_VALUE); + } catch (DBException e) { + throw new IOException(e); + } + } + + @Override + public void removeContainerPaused(ContainerId containerId) + throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("removeContainerPaused: containerId=" + containerId); + } + + String key = CONTAINERS_KEY_PREFIX + containerId.toString() + + CONTAINER_PAUSED_KEY_SUFFIX; + try { + db.delete(bytes(key)); + } catch (DBException e) { + throw new IOException(e); + } + } + @Override public void storeContainerDiagnostics(ContainerId containerId, StringBuilder diagnostics) throws IOException { @@ -497,6 +549,7 @@ public void removeContainer(ContainerId containerId) batch.delete(bytes(keyPrefix + CONTAINER_DIAGS_KEY_SUFFIX)); batch.delete(bytes(keyPrefix + CONTAINER_LAUNCHED_KEY_SUFFIX)); batch.delete(bytes(keyPrefix + CONTAINER_QUEUED_KEY_SUFFIX)); + batch.delete(bytes(keyPrefix + CONTAINER_PAUSED_KEY_SUFFIX)); batch.delete(bytes(keyPrefix + CONTAINER_KILLED_KEY_SUFFIX)); batch.delete(bytes(keyPrefix + CONTAINER_EXIT_CODE_KEY_SUFFIX)); List unknownKeysForContainer = containerUnknownKeySuffixes @@ -1091,6 +1144,35 @@ public void removeLogDeleter(ApplicationId appId) throws IOException { } } + @Override + public void storeAssignedResources(ContainerId containerId, + String resourceType, List assignedResources) + throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("storeAssignedResources: containerId=" + containerId + + ", assignedResources=" + StringUtils.join(",", assignedResources)); + } + + String keyResChng = CONTAINERS_KEY_PREFIX + containerId.toString() + + CONTAINER_ASSIGNED_RESOURCES_KEY_SUFFIX + resourceType; + try { + WriteBatch batch = db.createWriteBatch(); + try { + ResourceMappings.AssignedResources res = + new ResourceMappings.AssignedResources(); + res.updateAssignedResources(assignedResources); + + // New value will overwrite old values for the same key + batch.put(bytes(keyResChng), res.toBytes()); + db.write(batch); + } finally { + batch.close(); + } + } catch (DBException e) { + throw new IOException(e); + } + } + @SuppressWarnings("deprecation") private void cleanupDeprecatedFinishedApps() { try { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java index 86dc99fdeaa..d1d0696935f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.nodemanager.recovery; import java.io.IOException; +import java.io.Serializable; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -78,6 +79,15 @@ public void storeContainer(ContainerId containerId, int version, public void storeContainerQueued(ContainerId containerId) throws IOException { } + @Override + public void storeContainerPaused(ContainerId containerId) throws IOException { + } + + @Override + public void removeContainerPaused(ContainerId containerId) + throws IOException { + } + @Override public void storeContainerDiagnostics(ContainerId containerId, StringBuilder diagnostics) throws IOException { @@ -257,6 +267,12 @@ public void removeAMRMProxyAppContext(ApplicationAttemptId attempt) throws IOException { } + @Override + public void storeAssignedResources(ContainerId containerId, + String resourceType, List assignedResources) + throws IOException { + } + @Override protected void initStorage(Configuration conf) throws IOException { } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java index ec534bff7ec..999d2d9dbaf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.nodemanager.recovery; import java.io.IOException; +import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -42,6 +43,7 @@ import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LogDeleterProto; import org.apache.hadoop.yarn.server.api.records.MasterKey; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings; @Private @Unstable @@ -71,7 +73,8 @@ public enum RecoveredContainerStatus { REQUESTED, QUEUED, LAUNCHED, - COMPLETED + COMPLETED, + PAUSED } public static class RecoveredContainerState { @@ -88,6 +91,7 @@ public static class RecoveredContainerState { private RecoveredContainerType recoveryType = RecoveredContainerType.RECOVER; private long startTime; + private ResourceMappings resMappings = new ResourceMappings(); public RecoveredContainerStatus getStatus() { return status; @@ -172,6 +176,14 @@ public RecoveredContainerType getRecoveryType() { public void setRecoveryType(RecoveredContainerType recoveryType) { this.recoveryType = recoveryType; } + + public ResourceMappings getResourceMappings() { + return resMappings; + } + + public void setResourceMappings(ResourceMappings mappings) { + this.resMappings = mappings; + } } public static class LocalResourceTrackerState { @@ -338,9 +350,9 @@ public boolean isNewlyCreated() { } /** - * Load the state of applications - * @return recovered state for applications - * @throws IOException + * Load the state of applications. + * @return recovered state for applications. + * @throws IOException IO Exception. */ public abstract RecoveredApplicationsState loadApplicationsState() throws IOException; @@ -391,6 +403,23 @@ public abstract void storeContainer(ContainerId containerId, public abstract void storeContainerQueued(ContainerId containerId) throws IOException; + /** + * Record that a container has been paused at the NM. + * @param containerId the container ID. + * @throws IOException IO Exception. + */ + public abstract void storeContainerPaused(ContainerId containerId) + throws IOException; + + /** + * Record that a container has been resumed at the NM by removing the + * fact that it has be paused. + * @param containerId the container ID. + * @throws IOException IO Exception. + */ + public abstract void removeContainerPaused(ContainerId containerId) + throws IOException; + /** * Record that a container has been launched * @param containerId the container ID @@ -699,6 +728,18 @@ public abstract void removeAMRMProxyAppContextEntry( public abstract void removeAMRMProxyAppContext(ApplicationAttemptId attempt) throws IOException; + /** + * Store the assigned resources to a container. + * + * @param containerId Container Id + * @param resourceType Resource Type + * @param assignedResources Assigned resources + * @throws IOException if fails + */ + public abstract void storeAssignedResources(ContainerId containerId, + String resourceType, List assignedResources) + throws IOException; + protected abstract void initStorage(Configuration conf) throws IOException; protected abstract void startStorage() throws IOException; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java index 32f73c85a0c..6fe5bbe73fd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java @@ -21,10 +21,16 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; + +import java.util.Map; /** * Helper class to determine hardware related characteristics such as the @@ -240,8 +246,8 @@ private static int getVCoresInternal(ResourceCalculatorPlugin plugin, return cores; } - private static int getConfiguredMemoryMB(Configuration conf) { - int memoryMb = conf.getInt(YarnConfiguration.NM_PMEM_MB, + private static long getConfiguredMemoryMB(Configuration conf) { + long memoryMb = conf.getLong(YarnConfiguration.NM_PMEM_MB, YarnConfiguration.DEFAULT_NM_PMEM_MB); if (memoryMb == -1) { memoryMb = YarnConfiguration.DEFAULT_NM_PMEM_MB; @@ -264,7 +270,7 @@ private static int getConfiguredMemoryMB(Configuration conf) { * - the configuration for the NodeManager * @return the amount of memory that will be used for YARN containers in MB. */ - public static int getContainerMemoryMB(Configuration conf) { + public static long getContainerMemoryMB(Configuration conf) { if (!isHardwareDetectionEnabled(conf)) { return getConfiguredMemoryMB(conf); } @@ -293,7 +299,7 @@ public static int getContainerMemoryMB(Configuration conf) { * - the configuration for the NodeManager * @return the amount of memory that will be used for YARN containers in MB. */ - public static int getContainerMemoryMB(ResourceCalculatorPlugin plugin, + public static long getContainerMemoryMB(ResourceCalculatorPlugin plugin, Configuration conf) { if (!isHardwareDetectionEnabled(conf) || plugin == null) { return getConfiguredMemoryMB(conf); @@ -301,26 +307,24 @@ public static int getContainerMemoryMB(ResourceCalculatorPlugin plugin, return getContainerMemoryMBInternal(plugin, conf); } - private static int getContainerMemoryMBInternal(ResourceCalculatorPlugin plugin, + private static long getContainerMemoryMBInternal(ResourceCalculatorPlugin plugin, Configuration conf) { - int memoryMb = conf.getInt(YarnConfiguration.NM_PMEM_MB, -1); + long memoryMb = conf.getInt(YarnConfiguration.NM_PMEM_MB, -1); if (memoryMb == -1) { - int physicalMemoryMB = - (int) (plugin.getPhysicalMemorySize() / (1024 * 1024)); - int hadoopHeapSizeMB = - (int) (Runtime.getRuntime().maxMemory() / (1024 * 1024)); - int containerPhysicalMemoryMB = - (int) (0.8f * (physicalMemoryMB - (2 * hadoopHeapSizeMB))); - int reservedMemoryMB = - conf.getInt(YarnConfiguration.NM_SYSTEM_RESERVED_PMEM_MB, -1); + long physicalMemoryMB = (plugin.getPhysicalMemorySize() / (1024 * 1024)); + long hadoopHeapSizeMB = (Runtime.getRuntime().maxMemory() + / (1024 * 1024)); + long containerPhysicalMemoryMB = (long) (0.8f + * (physicalMemoryMB - (2 * hadoopHeapSizeMB))); + long reservedMemoryMB = conf + .getInt(YarnConfiguration.NM_SYSTEM_RESERVED_PMEM_MB, -1); if (reservedMemoryMB != -1) { containerPhysicalMemoryMB = physicalMemoryMB - reservedMemoryMB; } - if(containerPhysicalMemoryMB <= 0) { + if (containerPhysicalMemoryMB <= 0) { LOG.error("Calculated memory for YARN containers is too low." + " Node memory is " + physicalMemoryMB - + " MB, system reserved memory is " - + reservedMemoryMB + " MB."); + + " MB, system reserved memory is " + reservedMemoryMB + " MB."); } containerPhysicalMemoryMB = Math.max(containerPhysicalMemoryMB, 0); memoryMb = containerPhysicalMemoryMB; @@ -332,4 +336,50 @@ private static int getContainerMemoryMBInternal(ResourceCalculatorPlugin plugin, } return memoryMb; } + + /** + * Get the resources for the node. + * @param configuration configuration file + * @return the resources for the node + */ + public static Resource getNodeResources(Configuration configuration) { + Configuration conf = new Configuration(configuration); + String memory = ResourceInformation.MEMORY_MB.getName(); + String vcores = ResourceInformation.VCORES.getName(); + + Resource ret = Resource.newInstance(0, 0); + Map resourceInformation = + ResourceUtils.getNodeResourceInformation(conf); + for (Map.Entry entry : resourceInformation + .entrySet()) { + ret.setResourceInformation(entry.getKey(), entry.getValue()); + LOG.debug("Setting key " + entry.getKey() + " to " + entry.getValue()); + } + if (resourceInformation.containsKey(memory)) { + Long value = resourceInformation.get(memory).getValue(); + if (value > Integer.MAX_VALUE) { + throw new YarnRuntimeException("Value '" + value + + "' for resource memory is more than the maximum for an integer."); + } + ResourceInformation memResInfo = resourceInformation.get(memory); + if(memResInfo.getValue() == 0) { + ret.setMemorySize(getContainerMemoryMB(conf)); + LOG.debug("Set memory to " + ret.getMemorySize()); + } + } + if (resourceInformation.containsKey(vcores)) { + Long value = resourceInformation.get(vcores).getValue(); + if (value > Integer.MAX_VALUE) { + throw new YarnRuntimeException("Value '" + value + + "' for resource vcores is more than the maximum for an integer."); + } + ResourceInformation vcoresResInfo = resourceInformation.get(vcores); + if(vcoresResInfo.getValue() == 0) { + ret.setVirtualCores(getVCores(conf)); + LOG.debug("Set vcores to " + ret.getVirtualCores()); + } + } + LOG.debug("Node resource information map is " + ret); + return ret; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NMContainerLogsInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NMContainerLogsInfo.java index 5415e04eb47..193ec623f20 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NMContainerLogsInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/NMContainerLogsInfo.java @@ -27,7 +27,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.logaggregation.ContainerLogAggregationType; -import org.apache.hadoop.yarn.logaggregation.PerContainerLogFileInfo; +import org.apache.hadoop.yarn.logaggregation.ContainerLogFileInfo; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.webapp.ContainerLogsUtils; import org.apache.hadoop.yarn.server.webapp.dao.ContainerLogsInfo; @@ -55,10 +55,10 @@ public NMContainerLogsInfo(final Context nmContext, containerId, remoteUser, nmContext); } - private static List getContainerLogsInfo( + private static List getContainerLogsInfo( ContainerId id, String remoteUser, Context nmContext) throws YarnException { - List logFiles = new ArrayList<>(); + List logFiles = new ArrayList<>(); List logDirs = ContainerLogsUtils.getContainerLogDirs( id, remoteUser, nmContext); for (File containerLogsDir : logDirs) { @@ -66,7 +66,7 @@ private static List getContainerLogsInfo( if (logs != null) { for (File log : logs) { if (log.isFile()) { - PerContainerLogFileInfo logMeta = new PerContainerLogFileInfo( + ContainerLogFileInfo logMeta = new ContainerLogFileInfo( log.getName(), Long.toString(log.length()), Times.format(log.lastModified())); logFiles.add(logMeta); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java index 8e4522baac8..9e594498ad6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java @@ -159,7 +159,8 @@ public long getRMIdentifier() { containerManager.startContainers(allRequests); BaseContainerManagerTest.waitForContainerState(containerManager, cID, - Arrays.asList(ContainerState.RUNNING, ContainerState.SCHEDULED), 20); + Arrays.asList(ContainerState.RUNNING, ContainerState.SCHEDULED, + ContainerState.NEW), 20); List containerIds = new ArrayList(); containerIds.add(cID); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManager.java index 2d390ac998a..92797116075 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManager.java @@ -25,6 +25,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider; import org.junit.Assert; import org.junit.Test; @@ -57,6 +60,71 @@ public void testContainerExecutorInitCall() { } } + private static int initCalls = 0; + private static int preCalls = 0; + private static int postCalls = 0; + + private static class DummyCSTListener1 + implements ContainerStateTransitionListener { + @Override + public void init(Context context) { + initCalls++; + } + + @Override + public void preTransition(ContainerImpl op, ContainerState beforeState, + ContainerEvent eventToBeProcessed) { + preCalls++; + } + + @Override + public void postTransition(ContainerImpl op, ContainerState beforeState, + ContainerState afterState, ContainerEvent processedEvent) { + postCalls++; + } + } + + private static class DummyCSTListener2 + implements ContainerStateTransitionListener { + @Override + public void init(Context context) { + initCalls++; + } + + @Override + public void preTransition(ContainerImpl op, ContainerState beforeState, + ContainerEvent eventToBeProcessed) { + preCalls++; + } + + @Override + public void postTransition(ContainerImpl op, ContainerState beforeState, + ContainerState afterState, ContainerEvent processedEvent) { + postCalls++; + } + } + + @Test + public void testListenerInitialization() throws Exception{ + NodeManager nodeManager = new NodeManager(); + Configuration conf = new Configuration(); + conf.set(YarnConfiguration.NM_CONTAINER_STATE_TRANSITION_LISTENERS, + DummyCSTListener1.class.getName() + "," + + DummyCSTListener2.class.getName()); + initCalls = 0; + preCalls = 0; + postCalls = 0; + NodeManager.NMContext nmContext = + nodeManager.createNMContext(null, null, null, false, conf); + Assert.assertEquals(2, initCalls); + nmContext.getContainerStateTransitionListener().preTransition( + null, null, null); + nmContext.getContainerStateTransitionListener().postTransition( + null, null, null, null); + Assert.assertEquals(2, preCalls); + Assert.assertEquals(2, postCalls); + } + @Test public void testCreationOfNodeLabelsProviderService() throws InterruptedException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java index 7c8551e75b0..0838f1e523a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java @@ -33,6 +33,8 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.yarn.server.nodemanager.ContainerStateTransitionListener; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -765,5 +767,11 @@ public NMTimelinePublisher getNMTimelinePublisher() { public ContainerExecutor getContainerExecutor() { return null; } + + @Override + public ContainerStateTransitionListener + getContainerStateTransitionListener() { + return null; + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index f379c0897ca..6eea77b5a12 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -147,7 +147,7 @@ public int launchContainer(ContainerStartContext ctx) @Before public void setup() throws IOException { conf.setInt( - YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, 10); + YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, 0); super.setup(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java index 224e99cf9f8..5ec0ae64c32 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java @@ -31,6 +31,7 @@ import java.io.File; import java.io.IOException; import java.io.PrintWriter; +import java.io.Serializable; import java.nio.ByteBuffer; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; @@ -90,6 +91,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService; @@ -108,6 +110,7 @@ import org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.util.timeline.TimelineUtils; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -400,9 +403,8 @@ public void testContainerResizeRecovery() throws Exception { NMStateStoreService stateStore = new NMMemoryStateStoreService(); stateStore.init(conf); stateStore.start(); - Context context = createContext(conf, stateStore); + context = createContext(conf, stateStore); ContainerManagerImpl cm = createContainerManager(context, delSrvc); - cm.dispatcher.disableExitOnDispatchException(); cm.init(conf); cm.start(); // add an application by starting a container @@ -410,55 +412,12 @@ public void testContainerResizeRecovery() throws Exception { ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1); ContainerId cid = ContainerId.newContainerId(attemptId, 1); - Map containerEnv = new HashMap<>(); - setFlowContext(containerEnv, "app_name1", appId); - Map serviceData = Collections.emptyMap(); - Credentials containerCreds = new Credentials(); - DataOutputBuffer dob = new DataOutputBuffer(); - containerCreds.writeTokenStorageToStream(dob); - ByteBuffer containerTokens = ByteBuffer.wrap(dob.getData(), 0, - dob.getLength()); - Map acls = Collections.emptyMap(); - File tmpDir = new File("target", - this.getClass().getSimpleName() + "-tmpDir"); - File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile"); - PrintWriter fileWriter = new PrintWriter(scriptFile); - if (Shell.WINDOWS) { - fileWriter.println("@ping -n 100 127.0.0.1 >nul"); - } else { - fileWriter.write("\numask 0"); - fileWriter.write("\nexec sleep 100"); - } - fileWriter.close(); - FileContext localFS = FileContext.getLocalFSFileContext(); - URL resource_alpha = - URL.fromPath(localFS - .makeQualified(new Path(scriptFile.getAbsolutePath()))); - LocalResource rsrc_alpha = RecordFactoryProvider - .getRecordFactory(null).newRecordInstance(LocalResource.class); - rsrc_alpha.setResource(resource_alpha); - rsrc_alpha.setSize(-1); - rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION); - rsrc_alpha.setType(LocalResourceType.FILE); - rsrc_alpha.setTimestamp(scriptFile.lastModified()); - String destinationFile = "dest_file"; - Map localResources = new HashMap<>(); - localResources.put(destinationFile, rsrc_alpha); - List commands = - Arrays.asList(Shell.getRunScriptCommand(scriptFile)); - ContainerLaunchContext clc = ContainerLaunchContext.newInstance( - localResources, containerEnv, commands, serviceData, - containerTokens, acls); - StartContainersResponse startResponse = startContainer( - context, cm, cid, clc, null); - assertTrue(startResponse.getFailedRequests().isEmpty()); - assertEquals(1, context.getApplications().size()); + + commonLaunchContainer(appId, cid, cm); + Application app = context.getApplications().get(appId); assertNotNull(app); - // make sure the container reaches RUNNING state - waitForNMContainerState(cm, cid, - org.apache.hadoop.yarn.server.nodemanager - .containermanager.container.ContainerState.RUNNING); + Resource targetResource = Resource.newInstance(2048, 2); ContainerUpdateResponse updateResponse = updateContainers(context, cm, cid, targetResource); @@ -480,6 +439,58 @@ public void testContainerResizeRecovery() throws Exception { assertEquals(targetResource, containerStatus.getCapability()); } + @Test + public void testResourceMappingRecoveryForContainer() throws Exception { + conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true); + conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true); + NMStateStoreService stateStore = new NMMemoryStateStoreService(); + stateStore.init(conf); + stateStore.start(); + context = createContext(conf, stateStore); + ContainerManagerImpl cm = createContainerManager(context, delSrvc); + cm.init(conf); + cm.start(); + + // add an application by starting a container + ApplicationId appId = ApplicationId.newInstance(0, 1); + ApplicationAttemptId attemptId = + ApplicationAttemptId.newInstance(appId, 1); + ContainerId cid = ContainerId.newContainerId(attemptId, 1); + + commonLaunchContainer(appId, cid, cm); + + Application app = context.getApplications().get(appId); + assertNotNull(app); + + // store resource mapping of the container + List gpuResources = Arrays.asList("1", "2", "3"); + stateStore.storeAssignedResources(cid, "gpu", gpuResources); + List numaResources = Arrays.asList("numa1"); + stateStore.storeAssignedResources(cid, "numa", numaResources); + List fpgaResources = Arrays.asList("fpga1", "fpga2"); + stateStore.storeAssignedResources(cid, "fpga", fpgaResources); + + cm.stop(); + context = createContext(conf, stateStore); + cm = createContainerManager(context); + cm.init(conf); + cm.start(); + assertEquals(1, context.getApplications().size()); + app = context.getApplications().get(appId); + assertNotNull(app); + + Container nmContainer = context.getContainers().get(cid); + Assert.assertNotNull(nmContainer); + ResourceMappings resourceMappings = nmContainer.getResourceMappings(); + List assignedResource = resourceMappings + .getAssignedResources("gpu"); + Assert.assertTrue(assignedResource.equals(gpuResources)); + Assert.assertTrue( + resourceMappings.getAssignedResources("numa").equals(numaResources)); + Assert.assertTrue( + resourceMappings.getAssignedResources("fpga").equals(fpgaResources)); + } + @Test public void testContainerCleanupOnShutdown() throws Exception { ApplicationId appId = ApplicationId.newInstance(0, 1); @@ -552,6 +563,57 @@ public void testContainerCleanupOnShutdown() throws Exception { verify(cm, never()).handle(isA(CMgrCompletedAppsEvent.class)); } + private void commonLaunchContainer(ApplicationId appId, ContainerId cid, + ContainerManagerImpl cm) throws Exception { + Map containerEnv = new HashMap<>(); + setFlowContext(containerEnv, "app_name1", appId); + Map serviceData = Collections.emptyMap(); + Credentials containerCreds = new Credentials(); + DataOutputBuffer dob = new DataOutputBuffer(); + containerCreds.writeTokenStorageToStream(dob); + ByteBuffer containerTokens = ByteBuffer.wrap(dob.getData(), 0, + dob.getLength()); + Map acls = Collections.emptyMap(); + File tmpDir = new File("target", + this.getClass().getSimpleName() + "-tmpDir"); + File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile"); + PrintWriter fileWriter = new PrintWriter(scriptFile); + if (Shell.WINDOWS) { + fileWriter.println("@ping -n 100 127.0.0.1 >nul"); + } else { + fileWriter.write("\numask 0"); + fileWriter.write("\nexec sleep 100"); + } + fileWriter.close(); + FileContext localFS = FileContext.getLocalFSFileContext(); + URL resource_alpha = + URL.fromPath(localFS + .makeQualified(new Path(scriptFile.getAbsolutePath()))); + LocalResource rsrc_alpha = RecordFactoryProvider + .getRecordFactory(null).newRecordInstance(LocalResource.class); + rsrc_alpha.setResource(resource_alpha); + rsrc_alpha.setSize(-1); + rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION); + rsrc_alpha.setType(LocalResourceType.FILE); + rsrc_alpha.setTimestamp(scriptFile.lastModified()); + String destinationFile = "dest_file"; + Map localResources = new HashMap<>(); + localResources.put(destinationFile, rsrc_alpha); + List commands = + Arrays.asList(Shell.getRunScriptCommand(scriptFile)); + ContainerLaunchContext clc = ContainerLaunchContext.newInstance( + localResources, containerEnv, commands, serviceData, + containerTokens, acls); + StartContainersResponse startResponse = startContainer( + context, cm, cid, clc, null); + assertTrue(startResponse.getFailedRequests().isEmpty()); + assertEquals(1, context.getApplications().size()); + // make sure the container reaches RUNNING state + waitForNMContainerState(cm, cid, + org.apache.hadoop.yarn.server.nodemanager + .containermanager.container.ContainerState.RUNNING); + } + private ContainerManagerImpl createContainerManager(Context context, DeletionService delSrvc) { return new ContainerManagerImpl(context, exec, delSrvc, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index 33f460972f1..b44b5008288 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -71,7 +71,9 @@ import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; +import org.apache.hadoop.yarn.server.nodemanager.ContainerStateTransitionListener; import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; @@ -103,6 +105,7 @@ import org.junit.Assert; import org.junit.Test; import org.mockito.ArgumentMatcher; +import org.mockito.Mockito; public class TestContainer { @@ -119,6 +122,8 @@ public void testLocalizationRequest() throws Exception { try { wc = new WrappedContainer(7, 314159265358979L, 4344, "yak"); assertEquals(ContainerState.NEW, wc.c.getContainerState()); + ContainerImpl container = (ContainerImpl)wc.c; + assertEquals(org.apache.hadoop.yarn.api.records.ContainerState.NEW, container.getCurrentState()); wc.initContainer(); // Verify request for public/private resources to localizer @@ -128,6 +133,7 @@ public void testLocalizationRequest() throws Exception { LocalResourceVisibility.APPLICATION)); verify(wc.localizerBus).handle(argThat(matchesReq)); assertEquals(ContainerState.LOCALIZING, wc.c.getContainerState()); + assertEquals(org.apache.hadoop.yarn.api.records.ContainerState.SCHEDULED, container.getCurrentState()); } finally { if (wc != null) { @@ -205,6 +211,42 @@ public void testExternalKill() throws Exception { } } + @Test + @SuppressWarnings("unchecked") // mocked generic + public void testContainerPauseAndResume() throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(13, 314159265358979L, 4344, "yak"); + wc.initContainer(); + wc.localizeResources(); + int running = metrics.getRunningContainers(); + wc.launchContainer(); + assertEquals(running + 1, metrics.getRunningContainers()); + reset(wc.localizerBus); + wc.pauseContainer(); + assertEquals(ContainerState.PAUSED, + wc.c.getContainerState()); + wc.resumeContainer(); + assertEquals(ContainerState.RUNNING, + wc.c.getContainerState()); + wc.containerKilledOnRequest(); + assertEquals(ContainerState.EXITED_WITH_FAILURE, + wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + verifyCleanupCall(wc); + int failed = metrics.getFailedContainers(); + wc.containerResourcesCleanup(); + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + assertEquals(failed + 1, metrics.getFailedContainers()); + assertEquals(running, metrics.getRunningContainers()); + } + finally { + if (wc != null) { + wc.finished(); + } + } + } + @Test @SuppressWarnings("unchecked") // mocked generic public void testCleanupOnFailure() throws Exception { @@ -250,6 +292,29 @@ public void testCleanupOnSuccess() throws Exception { assertEquals(ContainerState.DONE, wc.c.getContainerState()); assertEquals(completed + 1, metrics.getCompletedContainers()); assertEquals(running, metrics.getRunningContainers()); + + ContainerEventType e1 = wc.initStateToEvent.get(ContainerState.NEW); + ContainerState s2 = wc.eventToFinalState.get(e1); + ContainerEventType e2 = wc.initStateToEvent.get(s2); + ContainerState s3 = wc.eventToFinalState.get(e2); + ContainerEventType e3 = wc.initStateToEvent.get(s3); + ContainerState s4 = wc.eventToFinalState.get(e3); + ContainerEventType e4 = wc.initStateToEvent.get(s4); + ContainerState s5 = wc.eventToFinalState.get(e4); + ContainerEventType e5 = wc.initStateToEvent.get(s5); + ContainerState s6 = wc.eventToFinalState.get(e5); + + Assert.assertEquals(ContainerState.LOCALIZING, s2); + Assert.assertEquals(ContainerState.SCHEDULED, s3); + Assert.assertEquals(ContainerState.RUNNING, s4); + Assert.assertEquals(ContainerState.EXITED_WITH_SUCCESS, s5); + Assert.assertEquals(ContainerState.DONE, s6); + + Assert.assertEquals(ContainerEventType.INIT_CONTAINER, e1); + Assert.assertEquals(ContainerEventType.RESOURCE_LOCALIZED, e2); + Assert.assertEquals(ContainerEventType.CONTAINER_LAUNCHED, e3); + Assert.assertEquals(ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS, e4); + Assert.assertEquals(ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP, e5); } finally { if (wc != null) { @@ -364,6 +429,10 @@ public void testKillOnNew() throws Exception { Assert.assertTrue( containerMetrics.finishTime.value() > containerMetrics.startTime .value()); + Assert.assertEquals(ContainerEventType.KILL_CONTAINER, + wc.initStateToEvent.get(ContainerState.NEW)); + Assert.assertEquals(ContainerState.DONE, + wc.eventToFinalState.get(ContainerEventType.KILL_CONTAINER)); } finally { if (wc != null) { wc.finished(); @@ -905,6 +974,10 @@ private class WrappedContainer { final Map localResources; final Map serviceData; final Context context = mock(Context.class); + private final Map initStateToEvent = + new HashMap<>(); + private final Map eventToFinalState = + new HashMap<>(); WrappedContainer(int appId, long timestamp, int id, String user) throws IOException { @@ -955,6 +1028,8 @@ protected void scheduleContainer(Container container) { NodeStatusUpdater nodeStatusUpdater = mock(NodeStatusUpdater.class); when(context.getNodeStatusUpdater()).thenReturn(nodeStatusUpdater); ContainerExecutor executor = mock(ContainerExecutor.class); + Mockito.doNothing().when(executor).pauseContainer(any(Container.class)); + Mockito.doNothing().when(executor).resumeContainer(any(Container.class)); launcher = new ContainersLauncher(context, dispatcher, executor, null, null); // create a mock ExecutorService, which will not really launch @@ -1009,7 +1084,27 @@ protected void scheduleContainer(Container container) { } when(ctxt.getServiceData()).thenReturn(serviceData); when(ctxt.getContainerRetryContext()).thenReturn(containerRetryContext); + ContainerStateTransitionListener listener = + new ContainerStateTransitionListener() { + @Override + public void init(Context cntxt) {} + @Override + public void preTransition(ContainerImpl op, ContainerState beforeState, + ContainerEvent eventToBeProcessed) { + initStateToEvent.put(beforeState, eventToBeProcessed.getType()); + } + + @Override + public void postTransition(ContainerImpl op, ContainerState beforeState, + ContainerState afterState, ContainerEvent processedEvent) { + eventToFinalState.put(processedEvent.getType(), afterState); + } + }; + NodeManager.DefaultContainerStateListener multi = + new NodeManager.DefaultContainerStateListener(); + multi.addListener(listener); + when(context.getContainerStateTransitionListener()).thenReturn(multi); c = new ContainerImpl(conf, dispatcher, ctxt, null, metrics, identifier, context); dispatcher.register(ContainerEventType.class, @@ -1143,6 +1238,18 @@ public void killContainer() { drainDispatcherEvents(); } + public void pauseContainer() { + c.handle(new ContainerPauseEvent(cId, + "PauseRequest")); + drainDispatcherEvents(); + } + + public void resumeContainer() { + c.handle(new ContainerResumeEvent(cId, + "ResumeRequest")); + drainDispatcherEvents(); + } + public void containerKilledOnRequest() { int exitCode = ContainerExitStatus.KILLED_BY_RESOURCEMANAGER; String diagnosticMsg = "Container completed with exit code " + exitCode; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java index 085b60f9a32..7176942303d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java @@ -46,6 +46,7 @@ import java.util.jar.JarFile; import java.util.jar.Manifest; +import com.google.common.base.Supplier; import com.google.common.collect.Lists; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; @@ -54,6 +55,7 @@ import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.token.SecretManager.InvalidToken; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.Shell.ExitCodeException; import org.apache.hadoop.util.StringUtils; @@ -724,11 +726,15 @@ public void testContainerEnvVariables() throws Exception { userSetEnv.put(Environment.LOGNAME.name(), "user_set_LOGNAME"); userSetEnv.put(Environment.PWD.name(), "user_set_PWD"); userSetEnv.put(Environment.HOME.name(), "user_set_HOME"); + final String userConfDir = "user_set_HADOOP_CONF_DIR"; + userSetEnv.put(Environment.HADOOP_CONF_DIR.name(), userConfDir); containerLaunchContext.setEnvironment(userSetEnv); File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile"); PrintWriter fileWriter = new PrintWriter(scriptFile); File processStartFile = + new File(tmpDir, "env_vars.tmp").getAbsoluteFile(); + final File processFinalFile = new File(tmpDir, "env_vars.txt").getAbsoluteFile(); if (Shell.WINDOWS) { fileWriter.println("@echo " + Environment.CONTAINER_ID.$() + "> " @@ -749,6 +755,8 @@ public void testContainerEnvVariables() throws Exception { + processStartFile); fileWriter.println("@echo " + Environment.HOME.$() + ">> " + processStartFile); + fileWriter.println("@echo " + Environment.HADOOP_CONF_DIR.$() + ">> " + + processStartFile); for (String serviceName : containerManager.getAuxServiceMetaData() .keySet()) { fileWriter.println("@echo %" + AuxiliaryServiceHelper.NM_AUX_SERVICE @@ -756,6 +764,8 @@ public void testContainerEnvVariables() throws Exception { + processStartFile); } fileWriter.println("@echo " + cId + ">> " + processStartFile); + fileWriter.println("@move /Y " + processStartFile + " " + + processFinalFile); fileWriter.println("@ping -n 100 127.0.0.1 >nul"); } else { fileWriter.write("\numask 0"); // So that start file is readable by the test @@ -777,6 +787,8 @@ public void testContainerEnvVariables() throws Exception { + processStartFile); fileWriter.write("\necho $" + Environment.HOME.name() + " >> " + processStartFile); + fileWriter.write("\necho $" + Environment.HADOOP_CONF_DIR.name() + " >> " + + processStartFile); for (String serviceName : containerManager.getAuxServiceMetaData() .keySet()) { fileWriter.write("\necho $" + AuxiliaryServiceHelper.NM_AUX_SERVICE @@ -784,6 +796,7 @@ public void testContainerEnvVariables() throws Exception { + processStartFile); } fileWriter.write("\necho $$ >> " + processStartFile); + fileWriter.write("\nmv " + processStartFile + " " + processFinalFile); fileWriter.write("\nexec sleep 100"); } fileWriter.close(); @@ -817,13 +830,12 @@ public void testContainerEnvVariables() throws Exception { StartContainersRequest.newInstance(list); containerManager.startContainers(allRequests); - int timeoutSecs = 0; - while (!processStartFile.exists() && timeoutSecs++ < 20) { - Thread.sleep(1000); - LOG.info("Waiting for process start-file to be created"); - } - Assert.assertTrue("ProcessStartFile doesn't exist!", - processStartFile.exists()); + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return processFinalFile.exists(); + } + }, 10, 20000); // Now verify the contents of the file List localDirs = dirsHandler.getLocalDirs(); @@ -843,7 +855,7 @@ public void testContainerEnvVariables() throws Exception { containerLogDirs.add(logDir + Path.SEPARATOR + relativeContainerLogDir); } BufferedReader reader = - new BufferedReader(new FileReader(processStartFile)); + new BufferedReader(new FileReader(processFinalFile)); Assert.assertEquals(cId.toString(), reader.readLine()); Assert.assertEquals(context.getNodeId().getHost(), reader.readLine()); Assert.assertEquals(String.valueOf(context.getNodeId().getPort()), @@ -866,7 +878,7 @@ public void testContainerEnvVariables() throws Exception { YarnConfiguration.NM_USER_HOME_DIR, YarnConfiguration.DEFAULT_NM_USER_HOME_DIR), reader.readLine()); - + Assert.assertEquals(userConfDir, reader.readLine()); for (String serviceName : containerManager.getAuxServiceMetaData().keySet()) { Assert.assertEquals( containerManager.getAuxServiceMetaData().get(serviceName), @@ -905,6 +917,8 @@ public void testContainerEnvVariables() throws Exception { YarnConfiguration.DEFAULT_NM_USER_HOME_DIR), containerLaunchContext.getEnvironment() .get(Environment.HOME.name())); + Assert.assertEquals(userConfDir, containerLaunchContext.getEnvironment() + .get(Environment.HADOOP_CONF_DIR.name())); // Get the pid of the process String pid = reader.readLine().trim(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDelegatingLinuxContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDelegatingLinuxContainerRuntime.java new file mode 100644 index 00000000000..7f4bbc4d375 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDelegatingLinuxContainerRuntime.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntime; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeConstants; +import org.junit.Before; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +import static org.junit.Assert.*; + +/** + * Test container runtime delegation. + */ +public class TestDelegatingLinuxContainerRuntime { + + private DelegatingLinuxContainerRuntime delegatingLinuxContainerRuntime; + private Configuration conf; + private Map env = new HashMap<>(); + + @Before + public void setUp() throws Exception { + delegatingLinuxContainerRuntime = new DelegatingLinuxContainerRuntime(); + conf = new Configuration(); + env.clear(); + } + + @Test + public void testIsRuntimeAllowedDefault() throws Exception { + conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES, + YarnConfiguration.DEFAULT_LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES[0]); + System.out.println(conf.get( + YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES)); + delegatingLinuxContainerRuntime.initialize(conf); + assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.DEFAULT)); + assertFalse(delegatingLinuxContainerRuntime.isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.DOCKER)); + assertFalse(delegatingLinuxContainerRuntime.isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.JAVASANDBOX)); + } + + @Test + public void testIsRuntimeAllowedDocker() throws Exception { + conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES, + "docker"); + delegatingLinuxContainerRuntime.initialize(conf); + assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.DOCKER)); + assertFalse(delegatingLinuxContainerRuntime.isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.DEFAULT)); + assertFalse(delegatingLinuxContainerRuntime.isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.JAVASANDBOX)); + } + + @Test + public void testIsRuntimeAllowedJavaSandbox() throws Exception { + conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES, + "javasandbox"); + delegatingLinuxContainerRuntime.initialize(conf); + assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.JAVASANDBOX)); + assertFalse(delegatingLinuxContainerRuntime.isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.DEFAULT)); + assertFalse(delegatingLinuxContainerRuntime.isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.DOCKER)); + } + + @Test + public void testIsRuntimeAllowedMultiple() throws Exception { + conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES, + "docker,javasandbox"); + delegatingLinuxContainerRuntime.initialize(conf); + assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.DOCKER)); + assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.JAVASANDBOX)); + assertFalse(delegatingLinuxContainerRuntime.isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.DEFAULT)); + } + + @Test + public void testIsRuntimeAllowedAll() throws Exception { + conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES, + "default,docker,javasandbox"); + delegatingLinuxContainerRuntime.initialize(conf); + assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.DEFAULT)); + assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.DOCKER)); + assertTrue(delegatingLinuxContainerRuntime.isRuntimeAllowed( + LinuxContainerRuntimeConstants.RuntimeType.JAVASANDBOX)); + } + + @Test + public void testJavaSandboxNotAllowedButPermissive() throws Exception { + conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES, + "default,docker"); + conf.set(YarnConfiguration.YARN_CONTAINER_SANDBOX, "permissive"); + delegatingLinuxContainerRuntime.initialize(conf); + ContainerRuntime runtime = + delegatingLinuxContainerRuntime.pickContainerRuntime(env); + assertTrue(runtime instanceof DefaultLinuxContainerRuntime); + } + + @Test + public void testJavaSandboxNotAllowedButPermissiveDockerRequested() + throws Exception { + env.put(ContainerRuntimeConstants.ENV_CONTAINER_TYPE, "docker"); + conf.set(YarnConfiguration.LINUX_CONTAINER_RUNTIME_ALLOWED_RUNTIMES, + "default,docker"); + conf.set(YarnConfiguration.YARN_CONTAINER_SANDBOX, "permissive"); + delegatingLinuxContainerRuntime.initialize(conf); + ContainerRuntime runtime = + delegatingLinuxContainerRuntime.pickContainerRuntime(env); + assertTrue(runtime instanceof DockerLinuxContainerRuntime); + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerBehaviorCompatibility.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerBehaviorCompatibility.java new file mode 100644 index 00000000000..5b99285d2e5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerBehaviorCompatibility.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler; + +import org.apache.hadoop.fs.UnsupportedFileSystemException; +import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; +import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.ExecutionType; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; +import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Make sure ContainerScheduler related changes are compatible + * with old behavior. + */ +public class TestContainerSchedulerBehaviorCompatibility + extends BaseContainerManagerTest { + public TestContainerSchedulerBehaviorCompatibility() + throws UnsupportedFileSystemException { + super(); + } + + @Before + public void setup() throws IOException { + conf.setInt(YarnConfiguration.NM_VCORES, 1); + conf.setInt(YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, + 0); + super.setup(); + } + + @Test + public void testForceStartGuaranteedContainersWhenOppContainerDisabled() + throws Exception { + containerManager.start(); + + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); + containerLaunchContext.setCommands(Arrays.asList("echo")); + + List list = new ArrayList<>(); + + // Add a container start request with #vcores > available (1). + // This could happen when DefaultContainerCalculator configured because + // on the RM side it won't check vcores at all. + list.add(StartContainerRequest.newInstance(containerLaunchContext, + createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER, + context.getNodeId(), user, BuilderUtils.newResource(2048, 4), + context.getContainerTokenSecretManager(), null, + ExecutionType.GUARANTEED))); + + StartContainersRequest allRequests = + StartContainersRequest.newInstance(list); + containerManager.startContainers(allRequests); + + ContainerScheduler cs = containerManager.getContainerScheduler(); + int nQueuedContainers = cs.getNumQueuedContainers(); + int nRunningContainers = cs.getNumRunningContainers(); + + // Wait at most 10 secs and we expect all containers finished. + int maxTry = 100; + int nTried = 1; + while (nQueuedContainers != 0 || nRunningContainers != 0) { + Thread.sleep(100); + nQueuedContainers = cs.getNumQueuedContainers(); + nRunningContainers = cs.getNumRunningContainers(); + nTried++; + if (nTried > maxTry) { + Assert.fail("Failed to get either number of queuing containers to 0 or " + + "number of running containers to 0, #queued=" + nQueuedContainers + + ", #running=" + nRunningContainers); + } + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java index 96765683871..f3fc724bad8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerQueuing.java @@ -23,6 +23,8 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.security.UserGroupInformation; @@ -49,6 +51,7 @@ import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl; @@ -124,18 +127,38 @@ public long getVCoresAllocatedForContainers() { @Override protected ContainerExecutor createContainerExecutor() { DefaultContainerExecutor exec = new DefaultContainerExecutor() { + ConcurrentMap oversleepMap = + new ConcurrentHashMap(); @Override public int launchContainer(ContainerStartContext ctx) throws IOException, ConfigurationException { + oversleepMap.put(ctx.getContainer().getContainerId().toString(), false); if (delayContainers) { try { Thread.sleep(10000); + if(oversleepMap.get(ctx.getContainer().getContainerId().toString()) + == true) { + Thread.sleep(10000); + } } catch (InterruptedException e) { // Nothing.. } } return super.launchContainer(ctx); } + + @Override + public void pauseContainer(Container container) { + // To mimic pausing we force the container to be in the PAUSED state + // a little longer by oversleeping. + oversleepMap.put(container.getContainerId().toString(), true); + LOG.info("Container was paused"); + } + + @Override + public void resumeContainer(Container container) { + LOG.info("Container was resumed"); + } }; exec.setConf(conf); return spy(exec); @@ -505,6 +528,86 @@ public void testKillOpportunisticForGuaranteedContainer() throws Exception { contStatus1.getState()); } + /** + * Submit two OPPORTUNISTIC and one GUARANTEED containers. The resources + * requests by each container as such that only one can run in parallel. + * Thus, the OPPORTUNISTIC container that started running, will be + * paused for the GUARANTEED container to start. + * Once the GUARANTEED container finishes its execution, the remaining + * OPPORTUNISTIC container will be executed. + * @throws Exception + */ + @Test + public void testPauseOpportunisticForGuaranteedContainer() throws Exception { + containerManager.start(); + containerManager.getContainerScheduler(). + setUsePauseEventForPreemption(true); + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); + + List list = new ArrayList<>(); + list.add(StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, BuilderUtils.newResource(2048, 1), + context.getContainerTokenSecretManager(), null, + ExecutionType.OPPORTUNISTIC))); + + StartContainersRequest allRequests = + StartContainersRequest.newInstance(list); + containerManager.startContainers(allRequests); + + BaseContainerManagerTest.waitForNMContainerState(containerManager, + createContainerId(0), ContainerState.RUNNING, 40); + + list = new ArrayList<>(); + list.add(StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER, + context.getNodeId(), + user, BuilderUtils.newResource(2048, 1), + context.getContainerTokenSecretManager(), null, + ExecutionType.GUARANTEED))); + allRequests = + StartContainersRequest.newInstance(list); + + containerManager.startContainers(allRequests); + + BaseContainerManagerTest.waitForNMContainerState(containerManager, + createContainerId(1), ContainerState.RUNNING, 40); + + // Get container statuses. Container 0 should be paused, container 1 + // should be running. + List statList = new ArrayList(); + for (int i = 0; i < 2; i++) { + statList.add(createContainerId(i)); + } + GetContainerStatusesRequest statRequest = + GetContainerStatusesRequest.newInstance(statList); + List containerStatuses = containerManager + .getContainerStatuses(statRequest).getContainerStatuses(); + for (ContainerStatus status : containerStatuses) { + if (status.getContainerId().equals(createContainerId(0))) { + Assert.assertTrue(status.getDiagnostics().contains( + "Container Paused to make room for Guaranteed Container")); + } else if (status.getContainerId().equals(createContainerId(1))) { + Assert.assertEquals( + org.apache.hadoop.yarn.api.records.ContainerState.RUNNING, + status.getState()); + } + System.out.println("\nStatus : [" + status + "]\n"); + } + + // Make sure that the GUARANTEED container completes + BaseContainerManagerTest.waitForNMContainerState(containerManager, + createContainerId(1), ContainerState.DONE, 40); + // Make sure that the PAUSED opportunistic container resumes and + // starts running + BaseContainerManagerTest.waitForNMContainerState(containerManager, + createContainerId(0), ContainerState.DONE, 40); + } + /** * 1. Submit a long running GUARANTEED container to hog all NM resources. * 2. Submit 6 OPPORTUNISTIC containers, all of which will be queued. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java index c1638df7b5a..59a225ad188 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.nodemanager.recovery; import java.io.IOException; +import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -40,6 +41,7 @@ import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LogDeleterProto; import org.apache.hadoop.yarn.server.api.records.MasterKey; import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings; public class NMMemoryStateStoreService extends NMStateStoreService { private Map apps; @@ -119,6 +121,7 @@ public synchronized List loadContainersState() rcsCopy.setRemainingRetryAttempts(rcs.getRemainingRetryAttempts()); rcsCopy.setWorkDir(rcs.getWorkDir()); rcsCopy.setLogDir(rcs.getLogDir()); + rcsCopy.setResourceMappings(rcs.getResourceMappings()); result.add(rcsCopy); } return result; @@ -141,6 +144,19 @@ public void storeContainerQueued(ContainerId containerId) throws IOException { rcs.status = RecoveredContainerStatus.QUEUED; } + @Override + public void storeContainerPaused(ContainerId containerId) throws IOException { + RecoveredContainerState rcs = getRecoveredContainerState(containerId); + rcs.status = RecoveredContainerStatus.PAUSED; + } + + @Override + public void removeContainerPaused(ContainerId containerId) + throws IOException { + RecoveredContainerState rcs = getRecoveredContainerState(containerId); + rcs.status = RecoveredContainerStatus.LAUNCHED; + } + @Override public synchronized void storeContainerDiagnostics(ContainerId containerId, StringBuilder diagnostics) throws IOException { @@ -480,6 +496,17 @@ public synchronized void removeAMRMProxyAppContext( amrmProxyState.getAppContexts().remove(attempt); } + @Override + public void storeAssignedResources(ContainerId containerId, + String resourceType, List assignedResources) + throws IOException { + ResourceMappings.AssignedResources ar = + new ResourceMappings.AssignedResources(); + ar.updateAssignedResources(assignedResources); + containerStates.get(containerId).getResourceMappings() + .addAssignedResources(resourceType, ar); + } + private static class TrackerState { Map inProgressMap = new HashMap(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java index b0a9bc92b1e..8c13356cb22 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java @@ -32,6 +32,7 @@ import java.io.File; import java.io.IOException; +import java.io.Serializable; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; @@ -288,6 +289,23 @@ public void testContainerStorage() throws IOException { assertEquals(containerReq, rcs.getStartRequest()); assertEquals(diags.toString(), rcs.getDiagnostics()); + // pause the container, and verify recovered + stateStore.storeContainerPaused(containerId); + restartStateStore(); + recoveredContainers = stateStore.loadContainersState(); + assertEquals(1, recoveredContainers.size()); + rcs = recoveredContainers.get(0); + assertEquals(RecoveredContainerStatus.PAUSED, rcs.getStatus()); + assertEquals(ContainerExitStatus.INVALID, rcs.getExitCode()); + assertEquals(false, rcs.getKilled()); + assertEquals(containerReq, rcs.getStartRequest()); + + // Resume the container + stateStore.removeContainerPaused(containerId); + restartStateStore(); + recoveredContainers = stateStore.loadContainersState(); + assertEquals(1, recoveredContainers.size()); + // increase the container size, and verify recovered stateStore.storeContainerResourceChanged(containerId, 2, Resource.newInstance(2468, 4)); @@ -961,46 +979,12 @@ public void testUnexpectedKeyDoesntThrowException() throws IOException { .loadContainersState(); assertTrue(recoveredContainers.isEmpty()); - // create a container request ApplicationId appId = ApplicationId.newInstance(1234, 3); ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 4); ContainerId containerId = ContainerId.newContainerId(appAttemptId, 5); - LocalResource lrsrc = LocalResource.newInstance( - URL.newInstance("hdfs", "somehost", 12345, "/some/path/to/rsrc"), - LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, 123L, - 1234567890L); - Map localResources = - new HashMap(); - localResources.put("rsrc", lrsrc); - Map env = new HashMap(); - env.put("somevar", "someval"); - List containerCmds = new ArrayList(); - containerCmds.add("somecmd"); - containerCmds.add("somearg"); - Map serviceData = new HashMap(); - serviceData.put("someservice", - ByteBuffer.wrap(new byte[] { 0x1, 0x2, 0x3 })); - ByteBuffer containerTokens = ByteBuffer - .wrap(new byte[] { 0x7, 0x8, 0x9, 0xa }); - Map acls = - new HashMap(); - acls.put(ApplicationAccessType.VIEW_APP, "viewuser"); - acls.put(ApplicationAccessType.MODIFY_APP, "moduser"); - ContainerLaunchContext clc = ContainerLaunchContext.newInstance( - localResources, env, containerCmds, - serviceData, containerTokens, acls); - Resource containerRsrc = Resource.newInstance(1357, 3); - ContainerTokenIdentifier containerTokenId = new ContainerTokenIdentifier( - containerId, "host", "user", containerRsrc, 9876543210L, 42, 2468, - Priority.newInstance(7), 13579); - Token containerToken = Token.newInstance(containerTokenId.getBytes(), - ContainerTokenIdentifier.KIND.toString(), "password".getBytes(), - "tokenservice"); - StartContainerRequest containerReq = StartContainerRequest.newInstance(clc, - containerToken); - - stateStore.storeContainer(containerId, 0, 0, containerReq); + StartContainerRequest startContainerRequest = storeMockContainer( + containerId); // add a invalid key byte[] invalidKey = ("ContainerManager/containers/" @@ -1013,7 +997,7 @@ public void testUnexpectedKeyDoesntThrowException() throws IOException { assertEquals(RecoveredContainerStatus.REQUESTED, rcs.getStatus()); assertEquals(ContainerExitStatus.INVALID, rcs.getExitCode()); assertEquals(false, rcs.getKilled()); - assertEquals(containerReq, rcs.getStartRequest()); + assertEquals(startContainerRequest, rcs.getStartRequest()); assertTrue(rcs.getDiagnostics().isEmpty()); assertEquals(RecoveredContainerType.KILL, rcs.getRecoveryType()); // assert unknown keys are cleaned up finally @@ -1121,6 +1105,86 @@ public void testAMRMProxyStorage() throws IOException { } } + @Test + public void testStateStoreForResourceMapping() throws IOException { + // test empty when no state + List recoveredContainers = stateStore + .loadContainersState(); + assertTrue(recoveredContainers.isEmpty()); + + ApplicationId appId = ApplicationId.newInstance(1234, 3); + ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, + 4); + ContainerId containerId = ContainerId.newContainerId(appAttemptId, 5); + storeMockContainer(containerId); + + // Store ResourceMapping + stateStore.storeAssignedResources(containerId, "gpu", + Arrays.asList("1", "2", "3")); + // This will overwrite above + List gpuRes1 = Arrays.asList("1", "2", "4"); + stateStore.storeAssignedResources(containerId, "gpu", gpuRes1); + List fpgaRes = Arrays.asList("3", "4", "5", "6"); + stateStore.storeAssignedResources(containerId, "fpga", fpgaRes); + List numaRes = Arrays.asList("numa1"); + stateStore.storeAssignedResources(containerId, "numa", numaRes); + + // add a invalid key + restartStateStore(); + recoveredContainers = stateStore.loadContainersState(); + assertEquals(1, recoveredContainers.size()); + RecoveredContainerState rcs = recoveredContainers.get(0); + List res = rcs.getResourceMappings() + .getAssignedResources("gpu"); + Assert.assertTrue(res.equals(gpuRes1)); + + res = rcs.getResourceMappings().getAssignedResources("fpga"); + Assert.assertTrue(res.equals(fpgaRes)); + + res = rcs.getResourceMappings().getAssignedResources("numa"); + Assert.assertTrue(res.equals(numaRes)); + } + + private StartContainerRequest storeMockContainer(ContainerId containerId) + throws IOException { + // create a container request + LocalResource lrsrc = LocalResource.newInstance( + URL.newInstance("hdfs", "somehost", 12345, "/some/path/to/rsrc"), + LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, 123L, + 1234567890L); + Map localResources = + new HashMap(); + localResources.put("rsrc", lrsrc); + Map env = new HashMap(); + env.put("somevar", "someval"); + List containerCmds = new ArrayList(); + containerCmds.add("somecmd"); + containerCmds.add("somearg"); + Map serviceData = new HashMap(); + serviceData.put("someservice", + ByteBuffer.wrap(new byte[] { 0x1, 0x2, 0x3 })); + ByteBuffer containerTokens = ByteBuffer + .wrap(new byte[] { 0x7, 0x8, 0x9, 0xa }); + Map acls = + new HashMap(); + acls.put(ApplicationAccessType.VIEW_APP, "viewuser"); + acls.put(ApplicationAccessType.MODIFY_APP, "moduser"); + ContainerLaunchContext clc = ContainerLaunchContext.newInstance( + localResources, env, containerCmds, + serviceData, containerTokens, acls); + Resource containerRsrc = Resource.newInstance(1357, 3); + ContainerTokenIdentifier containerTokenId = new ContainerTokenIdentifier( + containerId, "host", "user", containerRsrc, 9876543210L, 42, 2468, + Priority.newInstance(7), 13579); + Token containerToken = Token.newInstance(containerTokenId.getBytes(), + ContainerTokenIdentifier.KIND.toString(), "password".getBytes(), + "tokenservice"); + StartContainerRequest containerReq = StartContainerRequest.newInstance(clc, + containerToken); + stateStore.storeContainer(containerId, 0, 0, containerReq); + return containerReq; + } + private static class NMTokenSecretManagerForTest extends BaseNMTokenSecretManager { public MasterKey generateKey() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java index 4add586bbf1..767c308aeb6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java @@ -172,7 +172,7 @@ public void testGetContainerMemoryMB() throws Exception { YarnConfiguration conf = new YarnConfiguration(); conf.setBoolean(YarnConfiguration.NM_ENABLE_HARDWARE_CAPABILITY_DETECTION, true); - int mem = NodeManagerHardwareUtils.getContainerMemoryMB(null, conf); + long mem = NodeManagerHardwareUtils.getContainerMemoryMB(null, conf); Assert.assertEquals(YarnConfiguration.DEFAULT_NM_PMEM_MB, mem); mem = NodeManagerHardwareUtils.getContainerMemoryMB(plugin, conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java index 57bee8c665e..77ebd347aab 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java @@ -37,6 +37,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceSet; import org.apache.hadoop.yarn.server.utils.BuilderUtils; @@ -239,4 +240,14 @@ public boolean isRecovering() { public long getContainerStartTime() { return 0; } + + @Override + public ResourceMappings getResourceMappings() { + return null; + } + + @Override + public void sendPauseEvent(String description) { + + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java index 16411715c55..fbab34a700a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java @@ -51,7 +51,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.logaggregation.ContainerLogAggregationType; -import org.apache.hadoop.yarn.logaggregation.PerContainerLogFileInfo; +import org.apache.hadoop.yarn.logaggregation.ContainerLogFileInfo; import org.apache.hadoop.yarn.logaggregation.TestContainerLogsUtils; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; @@ -537,7 +537,7 @@ private void testContainerLogs(WebResource r, ContainerId containerId) assertTrue(responseList.size() == 1); assertEquals(responseList.get(0).getLogType(), ContainerLogAggregationType.LOCAL.toString()); - List logMeta = responseList.get(0) + List logMeta = responseList.get(0) .getContainerLogsInfo(); assertTrue(logMeta.size() == 1); assertEquals(logMeta.get(0).getFileName(), filename); @@ -564,13 +564,13 @@ private void testContainerLogs(WebResource r, ContainerId containerId) for (ContainerLogsInfo logInfo : responseList) { if(logInfo.getLogType().equals( ContainerLogAggregationType.AGGREGATED.toString())) { - List meta = logInfo.getContainerLogsInfo(); + List meta = logInfo.getContainerLogsInfo(); assertTrue(meta.size() == 1); assertEquals(meta.get(0).getFileName(), aggregatedLogFile); } else { assertEquals(logInfo.getLogType(), ContainerLogAggregationType.LOCAL.toString()); - List meta = logInfo.getContainerLogsInfo(); + List meta = logInfo.getContainerLogsInfo(); assertTrue(meta.size() == 1); assertEquals(meta.get(0).getFileName(), filename); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/conf/capacity-scheduler.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/conf/capacity-scheduler.xml index 785ed0453b1..aca6c7cf529 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/conf/capacity-scheduler.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/conf/capacity-scheduler.xml @@ -106,6 +106,41 @@ + + yarn.scheduler.capacity.root.default.maximum-application-lifetime + + -1 + + Maximum lifetime of an application which is submitted to a queue + in seconds. Any value less than or equal to zero will be considered as + disabled. + This will be a hard time limit for all applications in this + queue. If positive value is configured then any application submitted + to this queue will be killed after exceeds the configured lifetime. + User can also specify lifetime per application basis in + application submission context. But user lifetime will be + overridden if it exceeds queue maximum lifetime. It is point-in-time + configuration. + Note : Configuring too low value will result in killing application + sooner. This feature is applicable only for leaf queue. + + + + + yarn.scheduler.capacity.root.default.default-application-lifetime + + -1 + + Default lifetime of an application which is submitted to a queue + in seconds. Any value less than or equal to zero will be considered as + disabled. + If the user has not submitted application with lifetime value then this + value will be taken. It is point-in-time configuration. + Note : Default lifetime can't exceed maximum lifetime. This feature is + applicable only for leaf queue. + + + yarn.scheduler.capacity.node-locality-delay 40 diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml index ba17b519f59..25a201ca4de 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml @@ -351,6 +351,11 @@ src/test/resources/delete-reservation.json src/test/resources/update-reservation.json src/test/resources/invariants.txt + src/test/resources/profiles/sample-profiles-1.json + src/test/resources/profiles/sample-profiles-2.json + src/test/resources/profiles/illegal-profiles-1.json + src/test/resources/profiles/illegal-profiles-2.json + src/test/resources/profiles/illegal-profiles-3.json diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AMSProcessingChain.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AMSProcessingChain.java index 931b1c8b7d5..7ae23e7bb63 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AMSProcessingChain.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AMSProcessingChain.java @@ -82,7 +82,7 @@ public synchronized void addProcessor( public void registerApplicationMaster( ApplicationAttemptId applicationAttemptId, RegisterApplicationMasterRequest request, - RegisterApplicationMasterResponse resp) throws IOException { + RegisterApplicationMasterResponse resp) throws IOException, YarnException { this.head.registerApplicationMaster(applicationAttemptId, request, resp); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index e6c25adefac..a0c3db622b4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -112,6 +112,12 @@ import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationPriorityResponse; import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsRequest; import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileResponse; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport; @@ -145,6 +151,7 @@ import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceProfilesManager; import org.apache.hadoop.yarn.server.resourcemanager.reservation.Plan; import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationAllocation; import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationInputValidator; @@ -175,6 +182,7 @@ import org.apache.hadoop.yarn.util.UTCClock; import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.util.timeline.TimelineUtils; @@ -207,9 +215,13 @@ public class ClientRMService extends AbstractService implements private ReservationSystem reservationSystem; private ReservationInputValidator rValidator; + private boolean displayPerUserApps = false; + private static final EnumSet ACTIVE_APP_STATES = EnumSet.of( RMAppState.ACCEPTED, RMAppState.RUNNING); + private ResourceProfilesManager resourceProfilesManager; + public ClientRMService(RMContext rmContext, YarnScheduler scheduler, RMAppManager rmAppManager, ApplicationACLsManager applicationACLsManager, QueueACLsManager queueACLsManager, @@ -232,6 +244,7 @@ public ClientRMService(RMContext rmContext, YarnScheduler scheduler, this.reservationSystem = rmContext.getReservationSystem(); this.clock = clock; this.rValidator = new ReservationInputValidator(clock); + resourceProfilesManager = rmContext.getResourceProfilesManager(); } @Override @@ -264,7 +277,11 @@ protected void serviceStart() throws Exception { } refreshServiceAcls(conf, RMPolicyProvider.getInstance()); } - + + this.displayPerUserApps = conf.getBoolean( + YarnConfiguration.DISPLAY_APPS_FOR_LOGGED_IN_USER, + YarnConfiguration.DEFAULT_DISPLAY_APPS_FOR_LOGGED_IN_USER); + this.server.start(); clientBindAddress = conf.updateConnectAddr(YarnConfiguration.RM_BIND_HOST, YarnConfiguration.RM_ADDRESS, @@ -898,6 +915,12 @@ public void remove() { continue; } + // Given RM is configured to display apps per user, skip apps to which + // this caller doesn't have access to view. + if (displayPerUserApps && !allowAccess) { + continue; + } + reports.add(application.createAndGetApplicationReport( callerUGI.getUserName(), allowAccess)); } @@ -1691,6 +1714,7 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( RMAuditLogger.logSuccess(callerUGI.getShortUserName(), AuditConstants.UPDATE_APP_TIMEOUTS, "ClientRMService", applicationId); + response.setApplicationTimeouts(applicationTimeouts); return response; } String msg = @@ -1702,7 +1726,8 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( } try { - rmAppManager.updateApplicationTimeout(application, applicationTimeouts); + applicationTimeouts = rmAppManager.updateApplicationTimeout(application, + applicationTimeouts); } catch (YarnException ex) { RMAuditLogger.logFailure(callerUGI.getShortUserName(), AuditConstants.UPDATE_APP_TIMEOUTS, "UNKNOWN", "ClientRMService", @@ -1712,6 +1737,7 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( RMAuditLogger.logSuccess(callerUGI.getShortUserName(), AuditConstants.UPDATE_APP_TIMEOUTS, "ClientRMService", applicationId); + response.setApplicationTimeouts(applicationTimeouts); return response; } @@ -1763,4 +1789,36 @@ private RMApp verifyUserAccessForRMApp(ApplicationId applicationId, return application; } + @Override + public GetAllResourceProfilesResponse getResourceProfiles( + GetAllResourceProfilesRequest request) throws YarnException, IOException { + GetAllResourceProfilesResponse response = + GetAllResourceProfilesResponse.newInstance(); + response.setResourceProfiles(resourceProfilesManager.getResourceProfiles()); + return response; + } + + @Override + public GetResourceProfileResponse getResourceProfile( + GetResourceProfileRequest request) throws YarnException, IOException { + GetResourceProfileResponse response = + GetResourceProfileResponse.newInstance(); + response.setResource( + resourceProfilesManager.getProfile(request.getProfileName())); + return response; + } + + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException { + GetAllResourceTypeInfoResponse response = + GetAllResourceTypeInfoResponse.newInstance(); + response.setResourceTypeInfo(ResourceUtils.getResourcesTypeInfo()); + return response; + } + + @VisibleForTesting + public void setDisplayPerUserApps(boolean displayPerUserApps) { + this.displayPerUserApps = displayPerUserApps; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DefaultAMSProcessor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DefaultAMSProcessor.java index a993d694e28..5632efecaa0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DefaultAMSProcessor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/DefaultAMSProcessor.java @@ -55,6 +55,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceProfilesManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt @@ -100,18 +101,21 @@ final class DefaultAMSProcessor implements ApplicationMasterServiceProcessor { RecordFactoryProvider.getRecordFactory(null); private RMContext rmContext; + private ResourceProfilesManager resourceProfilesManager; @Override public void init(ApplicationMasterServiceContext amsContext, ApplicationMasterServiceProcessor nextProcessor) { this.rmContext = (RMContext)amsContext; + this.resourceProfilesManager = rmContext.getResourceProfilesManager(); } @Override public void registerApplicationMaster( ApplicationAttemptId applicationAttemptId, RegisterApplicationMasterRequest request, - RegisterApplicationMasterResponse response) throws IOException { + RegisterApplicationMasterResponse response) + throws IOException, YarnException { RMApp app = getRmContext().getRMApps().get( applicationAttemptId.getApplicationId()); @@ -171,6 +175,12 @@ public void registerApplicationMaster( response.setSchedulerResourceTypes(getScheduler() .getSchedulingResourceTypes()); + if (getRmContext().getYarnConfiguration().getBoolean( + YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED, + YarnConfiguration.DEFAULT_RM_RESOURCE_PROFILES_ENABLED)) { + response.setResourceProfiles( + resourceProfilesManager.getResourceProfiles()); + } } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java index 4fc2916fe0c..c3ed7d51925 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/OpportunisticContainerAllocatorAMService.java @@ -127,7 +127,8 @@ public void init(ApplicationMasterServiceContext amsContext, public void registerApplicationMaster( ApplicationAttemptId applicationAttemptId, RegisterApplicationMasterRequest request, - RegisterApplicationMasterResponse response) throws IOException { + RegisterApplicationMasterResponse response) + throws IOException, YarnException { SchedulerApplicationAttempt appAttempt = ((AbstractYarnScheduler) getScheduler()).getApplicationAttempt(applicationAttemptId); if (appAttempt.getOpportunisticContainerContext() == null) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index bcd1a9c92d7..d0425907f6a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -66,9 +66,11 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.apache.hadoop.yarn.util.Times; import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.SettableFuture; +import org.apache.hadoop.yarn.util.StringHelper; /** * This class manages the list of applications for the resource manager. @@ -188,7 +190,12 @@ public static SummaryBuilder createAppSummary(RMApp app) { .add("preemptedAMContainers", metrics.getNumAMContainersPreempted()) .add("preemptedNonAMContainers", metrics.getNumNonAMContainersPreempted()) .add("preemptedResources", metrics.getResourcePreempted()) - .add("applicationType", app.getApplicationType()); + .add("applicationType", app.getApplicationType()) + .add("resourceSeconds", StringHelper + .getResourceSecondsString(metrics.getResourceSecondsMap())) + .add("preemptedResourceSeconds", StringHelper + .getResourceSecondsString( + metrics.getPreemptedResourceSecondsMap())); return summary; } @@ -571,18 +578,41 @@ public void handle(RMAppManagerEvent event) { } // transaction method. - public void updateApplicationTimeout(RMApp app, + public Map updateApplicationTimeout(RMApp app, Map newTimeoutInISO8601Format) throws YarnException { ApplicationId applicationId = app.getApplicationId(); synchronized (applicationId) { if (app.isAppInCompletedStates()) { - return; + return newTimeoutInISO8601Format; } Map newExpireTime = RMServerUtils .validateISO8601AndConvertToLocalTimeEpoch(newTimeoutInISO8601Format); + // validation is only for lifetime + Long updatedlifetimeInMillis = + newExpireTime.get(ApplicationTimeoutType.LIFETIME); + if (updatedlifetimeInMillis != null) { + long queueMaxLifetimeInSec = + scheduler.getMaximumApplicationLifetime(app.getQueue()); + + if (queueMaxLifetimeInSec > 0) { + if (updatedlifetimeInMillis > (app.getSubmitTime() + + queueMaxLifetimeInSec * 1000)) { + updatedlifetimeInMillis = + app.getSubmitTime() + queueMaxLifetimeInSec * 1000; + // cut off to maximum queue lifetime if update lifetime is exceeding + // queue lifetime. + newExpireTime.put(ApplicationTimeoutType.LIFETIME, + updatedlifetimeInMillis); + + newTimeoutInISO8601Format.put(ApplicationTimeoutType.LIFETIME, + Times.formatISO8601(updatedlifetimeInMillis.longValue())); + } + } + } + SettableFuture future = SettableFuture.create(); Map currentExpireTimeouts = @@ -605,6 +635,8 @@ public void updateApplicationTimeout(RMApp app, // update in-memory ((RMAppImpl) app).updateApplicationTimeout(newExpireTime); + + return newTimeoutInISO8601Format; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java index 0ea9516567d..6df31354d3e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java @@ -35,6 +35,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.placement.PlacementManager; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSystem; +import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceProfilesManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.monitor.RMAppLifetimeMonitor; @@ -159,4 +160,8 @@ void setRMDelegatedNodeLabelsUpdater( String getHAZookeeperConnectionState(); ResourceManager getResourceManager(); + + ResourceProfilesManager getResourceProfilesManager(); + + void setResourceProfilesManager(ResourceProfilesManager mgr); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java index db2c585c031..921e5d5fe77 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java @@ -37,6 +37,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.placement.PlacementManager; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSystem; +import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceProfilesManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.monitor.RMAppLifetimeMonitor; @@ -82,6 +83,8 @@ public class RMContextImpl implements RMContext { */ private RMActiveServiceContext activeServiceContext; + private ResourceProfilesManager resourceProfilesManager; + /** * Default constructor. To be used in conjunction with setter methods for * individual fields. @@ -551,5 +554,14 @@ public RMAppLifetimeMonitor getRMAppLifetimeMonitor() { return this.activeServiceContext.getRMAppLifetimeMonitor(); } + @Override + public ResourceProfilesManager getResourceProfilesManager() { + return this.resourceProfilesManager; + } + + @Override + public void setResourceProfilesManager(ResourceProfilesManager mgr) { + this.resourceProfilesManager = mgr; + } // Note: Read java doc before adding any services over here. } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java index 35b0c983fac..49342430eb2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java @@ -32,6 +32,7 @@ import com.google.common.collect.Sets; import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; @@ -46,6 +47,7 @@ import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; +import org.apache.hadoop.yarn.api.records.ProfileCapability; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest; @@ -65,6 +67,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.security.YarnAuthorizationProvider; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceProfilesManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt .RMAppAttemptState; @@ -88,6 +91,8 @@ */ public class RMServerUtils { + private static final Log LOG_HANDLE = LogFactory.getLog(RMServerUtils.class); + public static final String UPDATE_OUTSTANDING_ERROR = "UPDATE_OUTSTANDING_ERROR"; private static final String INCORRECT_CONTAINER_VERSION_ERROR = @@ -295,8 +300,7 @@ public static void checkSchedContainerChangeRequest( // Target resource of the increase request is more than NM can offer ResourceScheduler scheduler = rmContext.getScheduler(); RMNode rmNode = request.getSchedulerNode().getRMNode(); - if (!Resources.fitsIn(scheduler.getResourceCalculator(), - scheduler.getClusterResource(), targetResource, + if (!Resources.fitsIn(scheduler.getResourceCalculator(), targetResource, rmNode.getTotalCapability())) { String msg = "Target resource=" + targetResource + " of containerId=" + containerId + " is more than node's total resource=" @@ -478,7 +482,7 @@ public static YarnApplicationAttemptState createApplicationAttemptState( DUMMY_APPLICATION_RESOURCE_USAGE_REPORT = BuilderUtils.newApplicationResourceUsageReport(-1, -1, Resources.createResource(-1, -1), Resources.createResource(-1, -1), - Resources.createResource(-1, -1), 0, 0, 0, 0); + Resources.createResource(-1, -1), new HashMap<>(), new HashMap<>()); /** @@ -622,4 +626,43 @@ private static Set getNodeIdsForLabel(RMContext rmContext, return labelsToNodes.get(label); } } + + public static void convertProfileToResourceCapability(ResourceRequest ask, + Configuration conf, ResourceProfilesManager resourceProfilesManager) + throws YarnException { + + if (LOG_HANDLE.isDebugEnabled()) { + LOG_HANDLE + .debug("Converting profile to resource capability for ask " + ask); + } + + boolean profilesEnabled = + conf.getBoolean(YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED, + YarnConfiguration.DEFAULT_RM_RESOURCE_PROFILES_ENABLED); + if (!profilesEnabled) { + if (ask.getProfileCapability() != null && !ask.getProfileCapability() + .getProfileCapabilityOverride().equals(Resources.none())) { + ask.setCapability( + ask.getProfileCapability().getProfileCapabilityOverride()); + } + } else { + if (ask.getProfileCapability() != null) { + ask.setCapability(ProfileCapability + .toResource(ask.getProfileCapability(), + resourceProfilesManager.getResourceProfiles())); + } + } + if (LOG_HANDLE.isDebugEnabled()) { + LOG_HANDLE + .debug("Converted profile to resource capability for ask " + ask); + } + } + + public static Long getOrDefault(Map map, String key, + Long defaultValue) { + if (map.containsKey(key)) { + return map.get(key); + } + return defaultValue; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 5333f254328..e53a42c245c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -78,6 +78,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable; import org.apache.hadoop.yarn.server.resourcemanager.reservation.AbstractReservationSystem; import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSystem; +import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceProfilesManager; +import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceProfilesManagerImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; @@ -231,6 +233,13 @@ protected void serviceInit(Configuration conf) throws Exception { this.rmContext = new RMContextImpl(); rmContext.setResourceManager(this); + + // add resource profiles here because it's used by AbstractYarnScheduler + ResourceProfilesManager resourceProfilesManager = + new ResourceProfilesManagerImpl(); + resourceProfilesManager.init(conf); + rmContext.setResourceProfilesManager(resourceProfilesManager); + this.configurationProvider = ConfigurationProviderFactory.getConfigurationProvider(conf); this.configurationProvider.init(this.conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java index cc47e02cb19..a42d0533c52 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java @@ -376,10 +376,11 @@ public RegisterNodeManagerResponse registerNodeManager( // Check if this node has minimum allocations if (capability.getMemorySize() < minAllocMb || capability.getVirtualCores() < minAllocVcores) { - String message = - "NodeManager from " + host - + " doesn't satisfy minimum allocations, Sending SHUTDOWN" - + " signal to the NodeManager."; + String message = "NodeManager from " + host + + " doesn't satisfy minimum allocations, Sending SHUTDOWN" + + " signal to the NodeManager. Node capabilities are " + capability + + "; minimums are " + minAllocMb + "mb and " + minAllocVcores + + " vcores"; LOG.info(message); response.setDiagnosticsMessage(message); response.setNodeAction(NodeAction.SHUTDOWN); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java index 0ae3ef01340..f097e9c6291 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java @@ -156,8 +156,8 @@ public static boolean tryPreemptContainerAndDeductResToObtain( if (null != toObtainByPartition && Resources.greaterThan(rc, clusterResource, toObtainByPartition, Resources.none()) - && Resources.fitsIn(rc, clusterResource, - rmContainer.getAllocatedResource(), totalPreemptionAllowed) + && Resources.fitsIn(rc, rmContainer.getAllocatedResource(), + totalPreemptionAllowed) && !Resources.isAnyMajorResourceZero(rc, toObtainByPartition)) { Resources.subtractFrom(toObtainByPartition, rmContainer.getAllocatedResource()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/QueuePriorityContainerCandidateSelector.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/QueuePriorityContainerCandidateSelector.java index c730a2d88ea..7b7404caf3d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/QueuePriorityContainerCandidateSelector.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/QueuePriorityContainerCandidateSelector.java @@ -229,8 +229,7 @@ private boolean canPreemptEnoughResourceForAsked(Resource requiredResource, // If we already can allocate the reserved container after preemption, // skip following steps - if (Resources.fitsIn(rc, clusterResource, lacking, - Resources.none())) { + if (Resources.fitsIn(rc, lacking, Resources.none())) { return true; } @@ -270,7 +269,7 @@ private boolean canPreemptEnoughResourceForAsked(Resource requiredResource, } // Lacking <= 0 means we can allocate the reserved container - if (Resources.fitsIn(rc, clusterResource, lacking, Resources.none())) { + if (Resources.fitsIn(rc, lacking, Resources.none())) { return true; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ReservedContainerCandidatesSelector.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ReservedContainerCandidatesSelector.java index de23d0a291c..ff100d9a6ec 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ReservedContainerCandidatesSelector.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ReservedContainerCandidatesSelector.java @@ -87,8 +87,8 @@ public Map> selectCandidates( // Get list of nodes for preemption, ordered by preemption cost List nodesForPreemption = getNodesForPreemption( - clusterResource, queueToPreemptableResourceByPartition, - selectedCandidates, totalPreemptedResourceAllowed); + queueToPreemptableResourceByPartition, selectedCandidates, + totalPreemptedResourceAllowed); for (NodeForPreemption nfp : nodesForPreemption) { RMContainer reservedContainer = nfp.schedulerNode.getReservedContainer(); @@ -97,9 +97,8 @@ public Map> selectCandidates( } NodeForPreemption preemptionResult = getPreemptionCandidatesOnNode( - nfp.schedulerNode, clusterResource, - queueToPreemptableResourceByPartition, selectedCandidates, - totalPreemptedResourceAllowed, false); + nfp.schedulerNode, queueToPreemptableResourceByPartition, + selectedCandidates, totalPreemptedResourceAllowed, false); if (null != preemptionResult) { for (RMContainer c : preemptionResult.selectedContainers) { ApplicationAttemptId appId = c.getApplicationAttemptId(); @@ -135,8 +134,7 @@ private Resource getPreemptableResource(String queueName, return preemptable; } - private boolean tryToPreemptFromQueue(Resource cluster, String queueName, - String partitionName, + private boolean tryToPreemptFromQueue(String queueName, String partitionName, Map> queueToPreemptableResourceByPartition, Resource required, Resource totalPreemptionAllowed, boolean readOnly) { Resource preemptable = getPreemptableResource(queueName, partitionName, @@ -145,11 +143,11 @@ private boolean tryToPreemptFromQueue(Resource cluster, String queueName, return false; } - if (!Resources.fitsIn(rc, cluster, required, preemptable)) { + if (!Resources.fitsIn(rc, required, preemptable)) { return false; } - if (!Resources.fitsIn(rc, cluster, required, totalPreemptionAllowed)) { + if (!Resources.fitsIn(rc, required, totalPreemptionAllowed)) { return false; } @@ -165,7 +163,6 @@ private boolean tryToPreemptFromQueue(Resource cluster, String queueName, /** * Try to check if we can preempt resources for reserved container in given node * @param node - * @param cluster * @param queueToPreemptableResourceByPartition it's a map of * > * @param readOnly do we want to modify preemptable resource after we selected @@ -174,7 +171,7 @@ private boolean tryToPreemptFromQueue(Resource cluster, String queueName, * to satisfy reserved resource */ private NodeForPreemption getPreemptionCandidatesOnNode( - FiCaSchedulerNode node, Resource cluster, + FiCaSchedulerNode node, Map> queueToPreemptableResourceByPartition, Map> selectedCandidates, Resource totalPreemptionAllowed, boolean readOnly) { @@ -204,8 +201,7 @@ private NodeForPreemption getPreemptionCandidatesOnNode( String partition = node.getPartition(); // Avoid preempt any container if required <= available + killable - if (Resources.fitsIn(rc, cluster, reservedContainer.getReservedResource(), - cur)) { + if (Resources.fitsIn(rc, reservedContainer.getReservedResource(), cur)) { return null; } @@ -232,9 +228,9 @@ private NodeForPreemption getPreemptionCandidatesOnNode( // Can we preempt container c? // Check if we have quota to preempt this container - boolean canPreempt = tryToPreemptFromQueue(cluster, containerQueueName, - partition, queueToPreemptableResourceByPartition, - c.getAllocatedResource(), totalPreemptionAllowed, readOnly); + boolean canPreempt = tryToPreemptFromQueue(containerQueueName, partition, + queueToPreemptableResourceByPartition, c.getAllocatedResource(), + totalPreemptionAllowed, readOnly); // If we can, add to selected container, and change resource accordingly. if (canPreempt) { @@ -246,7 +242,7 @@ private NodeForPreemption getPreemptionCandidatesOnNode( Resources.addTo(totalSelected, c.getAllocatedResource()); } Resources.addTo(cur, c.getAllocatedResource()); - if (Resources.fitsIn(rc, cluster, + if (Resources.fitsIn(rc, reservedContainer.getReservedResource(), cur)) { canAllocateReservedContainer = true; break; @@ -282,7 +278,7 @@ private NodeForPreemption getPreemptionCandidatesOnNode( return nfp; } - private List getNodesForPreemption(Resource cluster, + private List getNodesForPreemption( Map> queueToPreemptableResourceByPartition, Map> selectedCandidates, Resource totalPreemptionAllowed) { @@ -292,7 +288,7 @@ private List getNodesForPreemption(Resource cluster, for (FiCaSchedulerNode node : preemptionContext.getScheduler() .getAllNodes()) { if (node.getReservedContainer() != null) { - NodeForPreemption nfp = getPreemptionCandidatesOnNode(node, cluster, + NodeForPreemption nfp = getPreemptionCandidatesOnNode(node, queueToPreemptableResourceByPartition, selectedCandidates, totalPreemptionAllowed, true); if (null != nfp) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java index 35340e62a22..00ef39fdd98 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java @@ -851,11 +851,8 @@ public void storeNewApplicationAttempt(RMAppAttempt appAttempt) { appAttempt.getAppAttemptId(), appAttempt.getMasterContainer(), credentials, appAttempt.getStartTime(), - resUsage.getMemorySeconds(), - resUsage.getVcoreSeconds(), - attempMetrics.getPreemptedMemory(), - attempMetrics.getPreemptedVcore() - ); + resUsage.getResourceUsageSecondsMap(), + attempMetrics.getPreemptedResourceSecondsMap()); getRMStateStoreEventHandler().handle( new RMStateStoreAppAttemptEvent(attemptState)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java index 67aaf947127..2de071ad2ec 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java @@ -25,23 +25,28 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerRecoveryProtos.ApplicationAttemptStateDataProto; +import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.util.Records; +import java.util.Map; + /* * Contains the state data that needs to be persisted for an ApplicationAttempt */ @Public @Unstable public abstract class ApplicationAttemptStateData { + public static ApplicationAttemptStateData newInstance( ApplicationAttemptId attemptId, Container container, Credentials attemptTokens, long startTime, RMAppAttemptState finalState, String finalTrackingUrl, String diagnostics, FinalApplicationStatus amUnregisteredFinalStatus, int exitStatus, - long finishTime, long memorySeconds, long vcoreSeconds, - long preemptedMemorySeconds, long preemptedVcoreSeconds) { + long finishTime, Map resourceSecondsMap, + Map preemptedResourceSecondsMap) { ApplicationAttemptStateData attemptStateData = Records.newRecord(ApplicationAttemptStateData.class); attemptStateData.setAttemptId(attemptId); @@ -54,23 +59,33 @@ public static ApplicationAttemptStateData newInstance( attemptStateData.setFinalApplicationStatus(amUnregisteredFinalStatus); attemptStateData.setAMContainerExitStatus(exitStatus); attemptStateData.setFinishTime(finishTime); - attemptStateData.setMemorySeconds(memorySeconds); - attemptStateData.setVcoreSeconds(vcoreSeconds); - attemptStateData.setPreemptedMemorySeconds(preemptedMemorySeconds); - attemptStateData.setPreemptedVcoreSeconds(preemptedVcoreSeconds); + attemptStateData.setMemorySeconds(RMServerUtils + .getOrDefault(resourceSecondsMap, + ResourceInformation.MEMORY_MB.getName(), 0L)); + attemptStateData.setVcoreSeconds(RMServerUtils + .getOrDefault(resourceSecondsMap, ResourceInformation.VCORES.getName(), + 0L)); + attemptStateData.setPreemptedMemorySeconds(RMServerUtils + .getOrDefault(preemptedResourceSecondsMap, + ResourceInformation.MEMORY_MB.getName(), 0L)); + attemptStateData.setPreemptedVcoreSeconds(RMServerUtils + .getOrDefault(preemptedResourceSecondsMap, + ResourceInformation.VCORES.getName(), 0L)); + attemptStateData.setResourceSecondsMap(resourceSecondsMap); + attemptStateData + .setPreemptedResourceSecondsMap(preemptedResourceSecondsMap); return attemptStateData; } public static ApplicationAttemptStateData newInstance( ApplicationAttemptId attemptId, Container masterContainer, - Credentials attemptTokens, long startTime, long memorySeconds, - long vcoreSeconds, long preemptedMemorySeconds, - long preemptedVcoreSeconds) { - return newInstance(attemptId, masterContainer, attemptTokens, - startTime, null, "N/A", "", null, ContainerExitStatus.INVALID, 0, - memorySeconds, vcoreSeconds, - preemptedMemorySeconds, preemptedVcoreSeconds); - } + Credentials attemptTokens, long startTime, + Map resourceSeondsMap, + Map preemptedResourceSecondsMap) { + return newInstance(attemptId, masterContainer, attemptTokens, startTime, + null, "N/A", "", null, ContainerExitStatus.INVALID, 0, + resourceSeondsMap, preemptedResourceSecondsMap); + } public abstract ApplicationAttemptStateDataProto getProto(); @@ -215,4 +230,50 @@ public abstract void setFinalApplicationStatus( @Public @Unstable public abstract void setPreemptedVcoreSeconds(long vcoreSeconds); + + /** + * Get the aggregated number of resources preempted that the application has + * allocated times the number of seconds the application has been running. + * + * @return map containing the resource name and aggregated preempted + * resource-seconds + */ + @Public + @Unstable + public abstract Map getResourceSecondsMap(); + + /** + * Set the aggregated number of resources that the application has + * allocated times the number of seconds the application has been running. + * + * @param resourceSecondsMap map containing the resource name and aggregated + * resource-seconds + */ + @Public + @Unstable + public abstract void setResourceSecondsMap( + Map resourceSecondsMap); + + /** + * Get the aggregated number of resources preempted that the application has + * allocated times the number of seconds the application has been running. + * + * @return map containing the resource name and aggregated preempted + * resource-seconds + */ + @Public + @Unstable + public abstract Map getPreemptedResourceSecondsMap(); + + /** + * Set the aggregated number of resources preempted that the application has + * allocated times the number of seconds the application has been running. + * + * @param preemptedResourceSecondsMap map containing the resource name and + * aggregated preempted resource-seconds + */ + @Public + @Unstable + public abstract void setPreemptedResourceSecondsMap( + Map preemptedResourceSecondsMap); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java index e89726f91ad..ed71ea2f016 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -55,6 +56,9 @@ public class ApplicationAttemptStateDataPBImpl extends private Container masterContainer = null; private ByteBuffer appAttemptTokens = null; + private Map resourceSecondsMap; + private Map preemptedResourceSecondsMap; + public ApplicationAttemptStateDataPBImpl() { builder = ApplicationAttemptStateDataProto.newBuilder(); } @@ -404,4 +408,50 @@ private static Credentials convertCredentialsFromByteBuffer( IOUtils.closeStream(dibb); } } + + @Override + public Map getResourceSecondsMap() { + if (this.resourceSecondsMap != null) { + return this.resourceSecondsMap; + } + ApplicationAttemptStateDataProtoOrBuilder p = viaProto ? proto : builder; + this.resourceSecondsMap = ProtoUtils.convertStringLongMapProtoListToMap( + p.getApplicationResourceUsageMapList()); + return this.resourceSecondsMap; + } + + @Override + public void setResourceSecondsMap(Map resourceSecondsMap) { + maybeInitBuilder(); + builder.clearApplicationResourceUsageMap(); + this.resourceSecondsMap = resourceSecondsMap; + if (resourceSecondsMap != null) { + builder.addAllApplicationResourceUsageMap( + ProtoUtils.convertMapToStringLongMapProtoList(resourceSecondsMap)); + } + } + + @Override + public Map getPreemptedResourceSecondsMap() { + if (this.preemptedResourceSecondsMap != null) { + return this.preemptedResourceSecondsMap; + } + ApplicationAttemptStateDataProtoOrBuilder p = viaProto ? proto : builder; + this.preemptedResourceSecondsMap = ProtoUtils + .convertStringLongMapProtoListToMap( + p.getApplicationResourceUsageMapList()); + return this.preemptedResourceSecondsMap; + } + + @Override + public void setPreemptedResourceSecondsMap( + Map preemptedResourceSecondsMap) { + maybeInitBuilder(); + builder.clearPreemptedResourceUsageMap(); + this.preemptedResourceSecondsMap = preemptedResourceSecondsMap; + if (preemptedResourceSecondsMap != null) { + builder.addAllPreemptedResourceUsageMap(ProtoUtils + .convertMapToStringLongMapProtoList(preemptedResourceSecondsMap)); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/CapacityOverTimePolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/CapacityOverTimePolicy.java index bb1a4e82789..acd577475ce 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/CapacityOverTimePolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/CapacityOverTimePolicy.java @@ -95,26 +95,29 @@ public void validate(Plan plan, ReservationAllocation reservation) throw new PlanningQuotaException(p); } + long checkStart = reservation.getStartTime() - validWindow; + long checkEnd = reservation.getEndTime() + validWindow; + //---- check for integral violations of capacity -------- // Gather a view of what to check (curr allocation of user, minus old // version of this reservation, plus new version) RLESparseResourceAllocation consumptionForUserOverTime = plan.getConsumptionForUserOverTime(reservation.getUser(), - reservation.getStartTime() - validWindow, - reservation.getEndTime() + validWindow); + checkStart, checkEnd); ReservationAllocation old = plan.getReservationById(reservation.getReservationId()); if (old != null) { - consumptionForUserOverTime = RLESparseResourceAllocation - .merge(plan.getResourceCalculator(), plan.getTotalCapacity(), - consumptionForUserOverTime, old.getResourcesOverTime(), - RLEOperator.add, reservation.getStartTime() - validWindow, - reservation.getEndTime() + validWindow); + consumptionForUserOverTime = + RLESparseResourceAllocation.merge(plan.getResourceCalculator(), + plan.getTotalCapacity(), consumptionForUserOverTime, + old.getResourcesOverTime(checkStart, checkEnd), RLEOperator.add, + checkStart, checkEnd); } - RLESparseResourceAllocation resRLE = reservation.getResourcesOverTime(); + RLESparseResourceAllocation resRLE = + reservation.getResourcesOverTime(checkStart, checkEnd); RLESparseResourceAllocation toCheck = RLESparseResourceAllocation .merge(plan.getResourceCalculator(), plan.getTotalCapacity(), @@ -191,11 +194,11 @@ public void validate(Plan plan, ReservationAllocation reservation) // compare using merge() limit with integral try { - RLESparseResourceAllocation - .merge(plan.getResourceCalculator(), plan.getTotalCapacity(), - targetLimit, integral, RLEOperator.subtractTestNonNegative, - reservation.getStartTime() - validWindow, - reservation.getEndTime() + validWindow); + + RLESparseResourceAllocation.merge(plan.getResourceCalculator(), + plan.getTotalCapacity(), targetLimit, integral, + RLEOperator.subtractTestNonNegative, checkStart, checkEnd); + } catch (PlanningException p) { throw new PlanningQuotaException( "Integral (avg over time) quota capacity " + maxAvg @@ -240,7 +243,8 @@ public RLESparseResourceAllocation availableResources( if (old != null) { used = RLESparseResourceAllocation.merge(plan.getResourceCalculator(), Resources.clone(plan.getTotalCapacity()), used, - old.getResourcesOverTime(), RLEOperator.subtract, start, end); + old.getResourcesOverTime(start, end), RLEOperator.subtract, start, + end); } instRLEQuota = RLESparseResourceAllocation diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/NoOverCommitPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/NoOverCommitPolicy.java index 49d470211d4..98ef5828760 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/NoOverCommitPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/NoOverCommitPolicy.java @@ -40,13 +40,17 @@ public void validate(Plan plan, ReservationAllocation reservation) RLESparseResourceAllocation available = plan.getAvailableResourceOverTime( reservation.getUser(), reservation.getReservationId(), - reservation.getStartTime(), reservation.getEndTime(), 0); + reservation.getStartTime(), reservation.getEndTime(), + reservation.getPeriodicity()); // test the reservation does not exceed what is available try { + + RLESparseResourceAllocation ask = reservation.getResourcesOverTime( + reservation.getStartTime(), reservation.getEndTime()); RLESparseResourceAllocation .merge(plan.getResourceCalculator(), plan.getTotalCapacity(), - available, reservation.getResourcesOverTime(), + available, ask, RLESparseResourceAllocation.RLEOperator.subtractTestNonNegative, reservation.getStartTime(), reservation.getEndTime()); } catch (PlanningException p) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceProfilesManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceProfilesManager.java new file mode 100644 index 00000000000..700f0ef3aa5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceProfilesManager.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.resource; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.exceptions.YARNFeatureNotEnabledException; +import org.apache.hadoop.yarn.exceptions.YarnException; + +import java.io.IOException; +import java.util.Map; + +/** + * Interface for the resource profiles manager. Provides an interface to get + * the list of available profiles and some helper functions. + */ +public interface ResourceProfilesManager { + + /** + * Method to handle all initialization steps for ResourceProfilesManager. + * @param config Configuration object + * @throws IOException when invalid resource profile names are loaded + */ + void init(Configuration config) throws IOException; + + /** + * Get the resource capability associated with given profile name. + * @param profile name of resource profile + * @return resource capability for given profile + * + * @throws YarnException when any invalid profile name or feature is disabled + */ + Resource getProfile(String profile) throws YarnException; + + /** + * Get all supported resource profiles. + * @return a map of resource objects associated with each profile + * + * @throws YARNFeatureNotEnabledException when feature is disabled + */ + Map getResourceProfiles() throws + YARNFeatureNotEnabledException; + + /** + * Reload profiles based on updated configuration. + * @throws IOException when invalid resource profile names are loaded + */ + void reloadProfiles() throws IOException; + + /** + * Get default supported resource profile. + * @return resource object which is default + * @throws YarnException when any invalid profile name or feature is disabled + */ + Resource getDefaultProfile() throws YarnException; + + /** + * Get minimum supported resource profile. + * @return resource object which is minimum + * @throws YarnException when any invalid profile name or feature is disabled + */ + Resource getMinimumProfile() throws YarnException; + + /** + * Get maximum supported resource profile. + * @return resource object which is maximum + * @throws YarnException when any invalid profile name or feature is disabled + */ + Resource getMaximumProfile() throws YarnException; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceProfilesManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceProfilesManagerImpl.java new file mode 100644 index 00000000000..dfd14e0d2c6 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceProfilesManagerImpl.java @@ -0,0 +1,257 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.resource; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YARNFeatureNotEnabledException; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; +import org.apache.hadoop.yarn.util.resource.Resources; +import org.codehaus.jackson.map.ObjectMapper; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * PBImpl class to handle all proto related implementation for + * ResourceProfilesManager. + */ +public class ResourceProfilesManagerImpl implements ResourceProfilesManager { + + private static final Log LOG = + LogFactory.getLog(ResourceProfilesManagerImpl.class); + + private final Map profiles = new ConcurrentHashMap<>(); + private List resourceTypeInfo = + new ArrayList(); + private Configuration conf; + private boolean profileEnabled = false; + + private static final String MEMORY = ResourceInformation.MEMORY_MB.getName(); + private static final String VCORES = ResourceInformation.VCORES.getName(); + + public static final String DEFAULT_PROFILE = "default"; + public static final String MINIMUM_PROFILE = "minimum"; + public static final String MAXIMUM_PROFILE = "maximum"; + + protected final ReentrantReadWriteLock.ReadLock readLock; + protected final ReentrantReadWriteLock.WriteLock writeLock; + + private static final String[] MANDATORY_PROFILES = {DEFAULT_PROFILE, + MINIMUM_PROFILE, MAXIMUM_PROFILE}; + private static final String FEATURE_NOT_ENABLED_MSG = + "Resource profile is not enabled, please " + + "enable resource profile feature before using its functions." + + " (by setting " + YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED + + " to true)"; + + public ResourceProfilesManagerImpl() { + ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + readLock = lock.readLock(); + writeLock = lock.writeLock(); + } + + public void init(Configuration config) throws IOException { + conf = config; + loadProfiles(); + + // Load resource types, this should be done even if resource profile is + // disabled, since we have mandatory resource types like vcores/memory. + loadResourceTypes(); + } + + private void loadResourceTypes() { + // Add all resource types + try { + writeLock.lock(); + Collection resourcesInfo = ResourceUtils + .getResourceTypes().values(); + for (ResourceInformation resourceInfo : resourcesInfo) { + resourceTypeInfo + .add(ResourceTypeInfo.newInstance(resourceInfo.getName(), + resourceInfo.getUnits(), resourceInfo.getResourceType())); + } + } finally { + writeLock.unlock(); + } + } + + private void loadProfiles() throws IOException { + profileEnabled = + conf.getBoolean(YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED, + YarnConfiguration.DEFAULT_RM_RESOURCE_PROFILES_ENABLED); + if (!profileEnabled) { + return; + } + String sourceFile = + conf.get(YarnConfiguration.RM_RESOURCE_PROFILES_SOURCE_FILE, + YarnConfiguration.DEFAULT_RM_RESOURCE_PROFILES_SOURCE_FILE); + String resourcesFile = sourceFile; + ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); + if (classLoader == null) { + classLoader = ResourceProfilesManagerImpl.class.getClassLoader(); + } + if (classLoader != null) { + URL tmp = classLoader.getResource(sourceFile); + if (tmp != null) { + resourcesFile = tmp.getPath(); + } + } + ObjectMapper mapper = new ObjectMapper(); + Map data = mapper.readValue(new File(resourcesFile), Map.class); + Iterator iterator = data.entrySet().iterator(); + while (iterator.hasNext()) { + Map.Entry entry = (Map.Entry) iterator.next(); + String profileName = entry.getKey().toString(); + if (profileName.isEmpty()) { + throw new IOException( + "Name of resource profile cannot be an empty string"); + } + if (entry.getValue() instanceof Map) { + Map profileInfo = (Map) entry.getValue(); + // ensure memory and vcores are specified + if (!profileInfo.containsKey(MEMORY) + || !profileInfo.containsKey(VCORES)) { + throw new IOException( + "Illegal resource profile definition; profile '" + profileName + + "' must contain '" + MEMORY + "' and '" + VCORES + "'"); + } + Resource resource = parseResource(profileInfo); + profiles.put(profileName, resource); + LOG.info( + "Added profile '" + profileName + "' with resources: " + resource); + } + } + // check to make sure mandatory profiles are present + for (String profile : MANDATORY_PROFILES) { + if (!profiles.containsKey(profile)) { + throw new IOException( + "Mandatory profile missing '" + profile + "' missing. " + + Arrays.toString(MANDATORY_PROFILES) + " must be present"); + } + } + LOG.info("Loaded profiles: " + profiles.keySet()); + } + + private Resource parseResource(Map profileInfo) throws IOException { + Resource resource = Resource.newInstance(0, 0); + Iterator iterator = profileInfo.entrySet().iterator(); + Map resourceTypes = ResourceUtils + .getResourceTypes(); + while (iterator.hasNext()) { + Map.Entry resourceEntry = (Map.Entry) iterator.next(); + String resourceName = resourceEntry.getKey().toString(); + ResourceInformation resourceValue = fromString(resourceName, + resourceEntry.getValue().toString()); + if (resourceName.equals(MEMORY)) { + resource.setMemorySize(resourceValue.getValue()); + continue; + } + if (resourceName.equals(VCORES)) { + resource + .setVirtualCores(Long.valueOf(resourceValue.getValue()).intValue()); + continue; + } + if (resourceTypes.containsKey(resourceName)) { + resource.setResourceInformation(resourceName, resourceValue); + } else { + throw new IOException("Unrecognized resource type '" + resourceName + + "'. Recognized resource types are '" + resourceTypes.keySet() + + "'"); + } + } + return resource; + } + + private void checkAndThrowExceptionWhenFeatureDisabled() + throws YARNFeatureNotEnabledException { + if (!profileEnabled) { + throw new YARNFeatureNotEnabledException(FEATURE_NOT_ENABLED_MSG); + } + } + + @Override + public Resource getProfile(String profile) throws YarnException{ + checkAndThrowExceptionWhenFeatureDisabled(); + + if (profile == null) { + throw new YarnException("Profile name cannot be null"); + } + + Resource profileRes = profiles.get(profile); + if (profileRes == null) { + throw new YarnException( + "Resource profile '" + profile + "' not found"); + } + return Resources.clone(profileRes); + } + + @Override + public Map getResourceProfiles() + throws YARNFeatureNotEnabledException { + checkAndThrowExceptionWhenFeatureDisabled(); + return Collections.unmodifiableMap(profiles); + } + + @Override + @VisibleForTesting + public void reloadProfiles() throws IOException { + profiles.clear(); + loadProfiles(); + } + + @Override + public Resource getDefaultProfile() throws YarnException { + return getProfile(DEFAULT_PROFILE); + } + + @Override + public Resource getMinimumProfile() throws YarnException { + return getProfile(MINIMUM_PROFILE); + } + + @Override + public Resource getMaximumProfile() throws YarnException { + return getProfile(MAXIMUM_PROFILE); + } + + private ResourceInformation fromString(String name, String value) { + String units = ResourceUtils.getUnits(value); + Long resourceValue = + Long.valueOf(value.substring(0, value.length() - units.length())); + return ResourceInformation.newInstance(name, units, resourceValue); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceWeights.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceWeights.java deleted file mode 100644 index b66a5d0d467..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceWeights.java +++ /dev/null @@ -1,72 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.server.resourcemanager.resource; - -import org.apache.hadoop.classification.InterfaceAudience.Private; -import org.apache.hadoop.classification.InterfaceStability.Evolving; -import org.apache.hadoop.util.StringUtils; - -@Private -@Evolving -public class ResourceWeights { - public static final ResourceWeights NEUTRAL = new ResourceWeights(1.0f); - - private final float[] weights = new float[ResourceType.values().length]; - - public ResourceWeights(float memoryWeight, float cpuWeight) { - weights[ResourceType.MEMORY.ordinal()] = memoryWeight; - weights[ResourceType.CPU.ordinal()] = cpuWeight; - } - - public ResourceWeights(float weight) { - setWeight(weight); - } - - public ResourceWeights() { } - - public final void setWeight(float weight) { - for (int i = 0; i < weights.length; i++) { - weights[i] = weight; - } - } - - public void setWeight(ResourceType resourceType, float weight) { - weights[resourceType.ordinal()] = weight; - } - - public float getWeight(ResourceType resourceType) { - return weights[resourceType.ordinal()]; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("<"); - for (int i = 0; i < ResourceType.values().length; i++) { - if (i != 0) { - sb.append(", "); - } - ResourceType resourceType = ResourceType.values()[i]; - sb.append(StringUtils.toLowerCase(resourceType.name())); - sb.append(StringUtils.format(" weight=%.1f", getWeight(resourceType))); - } - sb.append(">"); - return sb.toString(); - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 13b079286fc..98192caa28e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -748,14 +748,10 @@ public ApplicationReport createAndGetApplicationReport(String clientUserName, } RMAppMetrics rmAppMetrics = getRMAppMetrics(); - appUsageReport.setMemorySeconds(rmAppMetrics.getMemorySeconds()); - appUsageReport.setVcoreSeconds(rmAppMetrics.getVcoreSeconds()); - appUsageReport. - setPreemptedMemorySeconds(rmAppMetrics. - getPreemptedMemorySeconds()); - appUsageReport. - setPreemptedVcoreSeconds(rmAppMetrics. - getPreemptedVcoreSeconds()); + appUsageReport + .setResourceSecondsMap(rmAppMetrics.getResourceSecondsMap()); + appUsageReport.setPreemptedResourceSecondsMap( + rmAppMetrics.getPreemptedResourceSecondsMap()); } if (currentApplicationAttemptId == null) { @@ -1225,6 +1221,8 @@ public void transition(RMAppImpl app, RMAppEvent event) { long applicationLifetime = app.getApplicationLifetime(ApplicationTimeoutType.LIFETIME); + applicationLifetime = app.scheduler + .checkAndGetApplicationLifetime(app.queue, applicationLifetime); if (applicationLifetime > 0) { // calculate next timeout value Long newTimeout = @@ -1628,10 +1626,9 @@ public RMAppMetrics getRMAppMetrics() { Resource resourcePreempted = Resource.newInstance(0, 0); int numAMContainerPreempted = 0; int numNonAMContainerPreempted = 0; - long memorySeconds = 0; - long vcoreSeconds = 0; - long preemptedMemorySeconds = 0; - long preemptedVcoreSeconds = 0; + Map resourceSecondsMap = new HashMap<>(); + Map preemptedSecondsMap = new HashMap<>(); + for (RMAppAttempt attempt : attempts.values()) { if (null != attempt) { RMAppAttemptMetrics attemptMetrics = @@ -1645,17 +1642,25 @@ public RMAppMetrics getRMAppMetrics() { // for both running and finished containers. AggregateAppResourceUsage resUsage = attempt.getRMAppAttemptMetrics().getAggregateAppResourceUsage(); - memorySeconds += resUsage.getMemorySeconds(); - vcoreSeconds += resUsage.getVcoreSeconds(); - preemptedMemorySeconds += attemptMetrics.getPreemptedMemory(); - preemptedVcoreSeconds += attemptMetrics.getPreemptedVcore(); + for (Map.Entry entry : resUsage + .getResourceUsageSecondsMap().entrySet()) { + long value = RMServerUtils + .getOrDefault(resourceSecondsMap, entry.getKey(), 0L); + value += entry.getValue(); + resourceSecondsMap.put(entry.getKey(), value); + } + for (Map.Entry entry : attemptMetrics + .getPreemptedResourceSecondsMap().entrySet()) { + long value = RMServerUtils + .getOrDefault(preemptedSecondsMap, entry.getKey(), 0L); + value += entry.getValue(); + preemptedSecondsMap.put(entry.getKey(), value); + } } } - return new RMAppMetrics(resourcePreempted, - numNonAMContainerPreempted, numAMContainerPreempted, - memorySeconds, vcoreSeconds, - preemptedMemorySeconds, preemptedVcoreSeconds); + return new RMAppMetrics(resourcePreempted, numNonAMContainerPreempted, + numAMContainerPreempted, resourceSecondsMap, preemptedSecondsMap); } @Private diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java index fa068ea2d88..2bb7fd1ae10 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java @@ -19,27 +19,27 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; + +import java.util.Map; public class RMAppMetrics { final Resource resourcePreempted; final int numNonAMContainersPreempted; final int numAMContainersPreempted; - final long memorySeconds; - final long vcoreSeconds; - private final long preemptedMemorySeconds; - private final long preemptedVcoreSeconds; + private final Map resourceSecondsMap; + private final Map preemptedResourceSecondsMap; public RMAppMetrics(Resource resourcePreempted, int numNonAMContainersPreempted, int numAMContainersPreempted, - long memorySeconds, long vcoreSeconds, long preemptedMemorySeconds, - long preemptedVcoreSeconds) { + Map resourceSecondsMap, + Map preemptedResourceSecondsMap) { this.resourcePreempted = resourcePreempted; this.numNonAMContainersPreempted = numNonAMContainersPreempted; this.numAMContainersPreempted = numAMContainersPreempted; - this.memorySeconds = memorySeconds; - this.vcoreSeconds = vcoreSeconds; - this.preemptedMemorySeconds = preemptedMemorySeconds; - this.preemptedVcoreSeconds = preemptedVcoreSeconds; + this.resourceSecondsMap = resourceSecondsMap; + this.preemptedResourceSecondsMap = preemptedResourceSecondsMap; } public Resource getResourcePreempted() { @@ -55,19 +55,32 @@ public int getNumAMContainersPreempted() { } public long getMemorySeconds() { - return memorySeconds; + return RMServerUtils.getOrDefault(resourceSecondsMap, + ResourceInformation.MEMORY_MB.getName(), 0L); } public long getVcoreSeconds() { - return vcoreSeconds; + return RMServerUtils + .getOrDefault(resourceSecondsMap, ResourceInformation.VCORES.getName(), + 0L); } public long getPreemptedMemorySeconds() { - return preemptedMemorySeconds; + return RMServerUtils.getOrDefault(preemptedResourceSecondsMap, + ResourceInformation.MEMORY_MB.getName(), 0L); } public long getPreemptedVcoreSeconds() { - return preemptedVcoreSeconds; + return RMServerUtils.getOrDefault(preemptedResourceSecondsMap, + ResourceInformation.VCORES.getName(), 0L); + } + + public Map getResourceSecondsMap() { + return resourceSecondsMap; + } + + public Map getPreemptedResourceSecondsMap() { + return preemptedResourceSecondsMap; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java index f0c2b348c32..b858712f7d6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java @@ -19,42 +19,38 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt; import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; + +import java.util.HashMap; +import java.util.Map; @Private public class AggregateAppResourceUsage { - long memorySeconds; - long vcoreSeconds; + private Map resourceSecondsMap = new HashMap<>(); - public AggregateAppResourceUsage(long memorySeconds, long vcoreSeconds) { - this.memorySeconds = memorySeconds; - this.vcoreSeconds = vcoreSeconds; + public AggregateAppResourceUsage(Map resourceSecondsMap) { + this.resourceSecondsMap.putAll(resourceSecondsMap); } /** * @return the memorySeconds */ public long getMemorySeconds() { - return memorySeconds; - } - - /** - * @param memorySeconds the memorySeconds to set - */ - public void setMemorySeconds(long memorySeconds) { - this.memorySeconds = memorySeconds; + return RMServerUtils.getOrDefault(resourceSecondsMap, + ResourceInformation.MEMORY_MB.getName(), 0L); } /** * @return the vcoreSeconds */ public long getVcoreSeconds() { - return vcoreSeconds; + return RMServerUtils + .getOrDefault(resourceSecondsMap, ResourceInformation.VCORES.getName(), + 0L); } - /** - * @param vcoreSeconds the vcoreSeconds to set - */ - public void setVcoreSeconds(long vcoreSeconds) { - this.vcoreSeconds = vcoreSeconds; + public Map getResourceUsageSecondsMap() { + return resourceSecondsMap; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 65412df808c..fa29e008cb3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -953,12 +953,9 @@ public ApplicationResourceUsageReport getApplicationResourceUsageReport() { } AggregateAppResourceUsage resUsage = this.attemptMetrics.getAggregateAppResourceUsage(); - report.setMemorySeconds(resUsage.getMemorySeconds()); - report.setVcoreSeconds(resUsage.getVcoreSeconds()); - report.setPreemptedMemorySeconds( - this.attemptMetrics.getPreemptedMemory()); - report.setPreemptedVcoreSeconds( - this.attemptMetrics.getPreemptedVcore()); + report.setResourceSecondsMap(resUsage.getResourceUsageSecondsMap()); + report.setPreemptedResourceSecondsMap( + this.attemptMetrics.getPreemptedResourceSecondsMap()); return report; } finally { this.readLock.unlock(); @@ -995,11 +992,10 @@ public void recover(RMState state) { this.finalStatus = attemptState.getFinalApplicationStatus(); this.startTime = attemptState.getStartTime(); this.finishTime = attemptState.getFinishTime(); - this.attemptMetrics.updateAggregateAppResourceUsage( - attemptState.getMemorySeconds(), attemptState.getVcoreSeconds()); + this.attemptMetrics + .updateAggregateAppResourceUsage(attemptState.getResourceSecondsMap()); this.attemptMetrics.updateAggregatePreemptedAppResourceUsage( - attemptState.getPreemptedMemorySeconds(), - attemptState.getPreemptedVcoreSeconds()); + attemptState.getPreemptedResourceSecondsMap()); } public void transferStateFromAttempt(RMAppAttempt attempt) { @@ -1375,16 +1371,12 @@ private void rememberTargetTransitionsAndStoreState(RMAppAttemptEvent event, RMStateStore rmStore = rmContext.getStateStore(); setFinishTime(System.currentTimeMillis()); - ApplicationAttemptStateData attemptState = - ApplicationAttemptStateData.newInstance( - applicationAttemptId, getMasterContainer(), - rmStore.getCredentialsFromAppAttempt(this), - startTime, stateToBeStored, finalTrackingUrl, diags.toString(), - finalStatus, exitStatus, - getFinishTime(), resUsage.getMemorySeconds(), - resUsage.getVcoreSeconds(), - this.attemptMetrics.getPreemptedMemory(), - this.attemptMetrics.getPreemptedVcore()); + ApplicationAttemptStateData attemptState = ApplicationAttemptStateData + .newInstance(applicationAttemptId, getMasterContainer(), + rmStore.getCredentialsFromAppAttempt(this), startTime, + stateToBeStored, finalTrackingUrl, diags.toString(), finalStatus, exitStatus, + getFinishTime(), resUsage.getResourceUsageSecondsMap(), + this.attemptMetrics.getPreemptedResourceSecondsMap()); LOG.info("Updating application attempt " + applicationAttemptId + " with final state: " + targetedFinalState + ", and exit status: " + exitStatus); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java index 0655609a893..0982ef93434 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java @@ -18,6 +18,8 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt; +import java.util.HashMap; +import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; @@ -25,11 +27,13 @@ import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; +import org.apache.commons.lang.time.DateUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; @@ -49,10 +53,8 @@ public class RMAppAttemptMetrics { private ReadLock readLock; private WriteLock writeLock; - private AtomicLong finishedMemorySeconds = new AtomicLong(0); - private AtomicLong finishedVcoreSeconds = new AtomicLong(0); - private AtomicLong preemptedMemorySeconds = new AtomicLong(0); - private AtomicLong preemptedVcoreSeconds = new AtomicLong(0); + private Map resourceUsageMap = new HashMap<>(); + private Map preemptedResourceMap = new HashMap<>(); private RMContext rmContext; private int[][] localityStatistics = @@ -102,11 +104,16 @@ public Resource getResourcePreempted() { } public long getPreemptedMemory() { - return preemptedMemorySeconds.get(); + return preemptedResourceMap.get(ResourceInformation.MEMORY_MB.getName()) + .get(); } public long getPreemptedVcore() { - return preemptedVcoreSeconds.get(); + return preemptedResourceMap.get(ResourceInformation.VCORES.getName()).get(); + } + + public Map getPreemptedResourceSecondsMap() { + return convertAtomicLongMaptoLongMap(preemptedResourceMap); } public int getNumNonAMContainersPreempted() { @@ -122,35 +129,89 @@ public boolean getIsPreempted() { } public AggregateAppResourceUsage getAggregateAppResourceUsage() { - long memorySeconds = finishedMemorySeconds.get(); - long vcoreSeconds = finishedVcoreSeconds.get(); + Map resourcesUsed = + convertAtomicLongMaptoLongMap(resourceUsageMap); // Only add in the running containers if this is the active attempt. RMApp rmApp = rmContext.getRMApps().get(attemptId.getApplicationId()); - if (null != rmApp) { - RMAppAttempt currentAttempt = rmApp.getCurrentAppAttempt(); + if (rmApp != null) { + RMAppAttempt currentAttempt = rmContext.getRMApps().get(attemptId.getApplicationId()).getCurrentAppAttempt(); if (currentAttempt.getAppAttemptId().equals(attemptId)) { - ApplicationResourceUsageReport appResUsageReport = rmContext - .getScheduler().getAppResourceUsageReport(attemptId); + ApplicationResourceUsageReport appResUsageReport = + rmContext.getScheduler().getAppResourceUsageReport(attemptId); if (appResUsageReport != null) { - memorySeconds += appResUsageReport.getMemorySeconds(); - vcoreSeconds += appResUsageReport.getVcoreSeconds(); + Map tmp = appResUsageReport.getResourceSecondsMap(); + for (Map.Entry entry : tmp.entrySet()) { + if (resourcesUsed.containsKey(entry.getKey())) { + Long value = resourcesUsed.get(entry.getKey()); + value += entry.getValue(); + resourcesUsed.put(entry.getKey(), value); + } else{ + resourcesUsed.put(entry.getKey(), entry.getValue()); + } + } } } } - return new AggregateAppResourceUsage(memorySeconds, vcoreSeconds); + return new AggregateAppResourceUsage(resourcesUsed); } - public void updateAggregateAppResourceUsage(long finishedMemorySeconds, - long finishedVcoreSeconds) { - this.finishedMemorySeconds.addAndGet(finishedMemorySeconds); - this.finishedVcoreSeconds.addAndGet(finishedVcoreSeconds); + public void updateAggregateAppResourceUsage(Resource allocated, + long deltaUsedMillis) { + updateUsageMap(allocated, deltaUsedMillis, resourceUsageMap); + } + + public void updateAggregatePreemptedAppResourceUsage(Resource allocated, + long deltaUsedMillis) { + updateUsageMap(allocated, deltaUsedMillis, preemptedResourceMap); + } + + public void updateAggregateAppResourceUsage( + Map resourceSecondsMap) { + updateUsageMap(resourceSecondsMap, resourceUsageMap); } public void updateAggregatePreemptedAppResourceUsage( - long preemptedMemorySeconds, long preemptedVcoreSeconds) { - this.preemptedMemorySeconds.addAndGet(preemptedMemorySeconds); - this.preemptedVcoreSeconds.addAndGet(preemptedVcoreSeconds); + Map preemptedResourceSecondsMap) { + updateUsageMap(preemptedResourceSecondsMap, preemptedResourceMap); + } + + private void updateUsageMap(Resource allocated, long deltaUsedMillis, + Map targetMap) { + for (ResourceInformation entry : allocated.getResources()) { + AtomicLong resourceUsed; + if (!targetMap.containsKey(entry.getName())) { + resourceUsed = new AtomicLong(0); + targetMap.put(entry.getName(), resourceUsed); + + } + resourceUsed = targetMap.get(entry.getName()); + resourceUsed.addAndGet((entry.getValue() * deltaUsedMillis) + / DateUtils.MILLIS_PER_SECOND); + } + } + + private void updateUsageMap(Map sourceMap, + Map targetMap) { + for (Map.Entry entry : sourceMap.entrySet()) { + AtomicLong resourceUsed; + if (!targetMap.containsKey(entry.getKey())) { + resourceUsed = new AtomicLong(0); + targetMap.put(entry.getKey(), resourceUsed); + + } + resourceUsed = targetMap.get(entry.getKey()); + resourceUsed.set(entry.getValue()); + } + } + + private Map convertAtomicLongMaptoLongMap( + Map source) { + Map ret = new HashMap<>(); + for (Map.Entry entry : source.entrySet()) { + ret.put(entry.getKey(), entry.getValue().get()); + } + return ret; } public void incNumAllocatedContainers(NodeType containerType, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java index f49db7e761b..8c165ded8b3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java @@ -25,7 +25,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; -import org.apache.commons.lang.time.DateUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -35,6 +34,7 @@ import org.apache.hadoop.yarn.api.records.ContainerReport; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.ContainerUpdateType; import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; @@ -641,7 +641,8 @@ public void transition(RMContainerImpl container, RMContainerEvent event) { new AllocationExpirationInfo(event.getContainerId())); container.eventHandler.handle(new RMNodeUpdateContainerEvent( container.nodeId, - Collections.singletonList(container.getContainer()))); + Collections.singletonMap(container.getContainer(), + ContainerUpdateType.DECREASE_RESOURCE))); } else if (Resources.fitsIn(nmContainerResource, rmContainerResource)) { // If nmContainerResource < rmContainerResource, this is caused by the // following sequence: @@ -713,20 +714,15 @@ private static void updateAttemptMetrics(RMContainerImpl container) { if (rmAttempt != null) { long usedMillis = container.finishTime - container.creationTime; - long memorySeconds = resource.getMemorySize() - * usedMillis / DateUtils.MILLIS_PER_SECOND; - long vcoreSeconds = resource.getVirtualCores() - * usedMillis / DateUtils.MILLIS_PER_SECOND; rmAttempt.getRMAppAttemptMetrics() - .updateAggregateAppResourceUsage(memorySeconds,vcoreSeconds); + .updateAggregateAppResourceUsage(resource, usedMillis); // If this is a preempted container, update preemption metrics if (ContainerExitStatus.PREEMPTED == container.finishedStatus - .getExitStatus()) { - rmAttempt.getRMAppAttemptMetrics().updatePreemptionInfo(resource, - container); + .getExitStatus()) { rmAttempt.getRMAppAttemptMetrics() - .updateAggregatePreemptedAppResourceUsage(memorySeconds, - vcoreSeconds); + .updatePreemptionInfo(resource, container); + rmAttempt.getRMAppAttemptMetrics() + .updateAggregatePreemptedAppResourceUsage(resource, usedMillis); } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index 1bdaa98b16e..c547128f182 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.ContainerUpdateType; import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; @@ -173,7 +174,11 @@ public class RMNodeImpl implements RMNode, EventHandler { private final Map toBeUpdatedContainers = new HashMap<>(); - + + // NOTE: This is required for backward compatibility. + private final Map toBeDecreasedContainers = + new HashMap<>(); + private final Map nmReportedIncreasedContainers = new HashMap<>(); @@ -626,6 +631,10 @@ public void updateNodeHeartbeatResponseForUpdatedContainers( try { response.addAllContainersToUpdate(toBeUpdatedContainers.values()); toBeUpdatedContainers.clear(); + + // NOTE: This is required for backward compatibility. + response.addAllContainersToDecrease(toBeDecreasedContainers.values()); + toBeDecreasedContainers.clear(); } finally { this.writeLock.unlock(); } @@ -847,7 +856,8 @@ public void transition(RMNodeImpl rmNode, RMNodeEvent event) { containers = startEvent.getNMContainerStatuses(); if (containers != null && !containers.isEmpty()) { for (NMContainerStatus container : containers) { - if (container.getContainerState() == ContainerState.RUNNING) { + if (container.getContainerState() == ContainerState.RUNNING || + container.getContainerState() == ContainerState.SCHEDULED) { rmNode.launchedContainers.add(container.getContainerId()); } } @@ -1042,8 +1052,13 @@ public static class UpdateContainersTransition public void transition(RMNodeImpl rmNode, RMNodeEvent event) { RMNodeUpdateContainerEvent de = (RMNodeUpdateContainerEvent) event; - for (Container c : de.getToBeUpdatedContainers()) { - rmNode.toBeUpdatedContainers.put(c.getId(), c); + for (Map.Entry e : + de.getToBeUpdatedContainers().entrySet()) { + // NOTE: This is required for backward compatibility. + if (ContainerUpdateType.DECREASE_RESOURCE == e.getValue()) { + rmNode.toBeDecreasedContainers.put(e.getKey().getId(), e.getKey()); + } + rmNode.toBeUpdatedContainers.put(e.getKey().getId(), e.getKey()); } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeUpdateContainerEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeUpdateContainerEvent.java index 73af563dba0..b8f8e734f9b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeUpdateContainerEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeUpdateContainerEvent.java @@ -19,8 +19,10 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmnode; import java.util.List; +import java.util.Map; import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerUpdateType; import org.apache.hadoop.yarn.api.records.NodeId; /** @@ -29,16 +31,15 @@ * */ public class RMNodeUpdateContainerEvent extends RMNodeEvent { - private List toBeUpdatedContainers; + private Map toBeUpdatedContainers; public RMNodeUpdateContainerEvent(NodeId nodeId, - List toBeUpdatedContainers) { + Map toBeUpdatedContainers) { super(nodeId, RMNodeEventType.UPDATE_CONTAINER); - this.toBeUpdatedContainers = toBeUpdatedContainers; } - public List getToBeUpdatedContainers() { + public Map getToBeUpdatedContainers() { return toBeUpdatedContainers; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index 2c270174845..4896ab0e76a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -58,6 +58,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEvent; @@ -65,6 +66,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.RMCriticalThreadUncaughtExceptionHandler; import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; @@ -95,6 +97,7 @@ import org.apache.hadoop.yarn.server.utils.Lock; import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.SystemClock; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.util.resource.Resources; import com.google.common.annotations.VisibleForTesting; @@ -123,8 +126,20 @@ public abstract class AbstractYarnScheduler protected SchedulerHealth schedulerHealth = new SchedulerHealth(); protected volatile long lastNodeUpdateTime; + // timeout to join when we stop this service + protected final long THREAD_JOIN_TIMEOUT_MS = 1000; + private volatile Clock clock; + /** + * To enable the update thread, subclasses should set updateInterval to a + * positive value during {@link #serviceInit(Configuration)}. + */ + protected long updateInterval = -1L; + @VisibleForTesting + Thread updateThread; + private final Object updateThreadMonitor = new Object(); + /* * All schedulers which are inheriting AbstractYarnScheduler should use * concurrent version of 'applications' map. @@ -185,9 +200,35 @@ public void serviceInit(Configuration conf) throws Exception { autoUpdateContainers = conf.getBoolean(YarnConfiguration.RM_AUTO_UPDATE_CONTAINERS, YarnConfiguration.DEFAULT_RM_AUTO_UPDATE_CONTAINERS); + + if (updateInterval > 0) { + updateThread = new UpdateThread(); + updateThread.setName("SchedulerUpdateThread"); + updateThread.setUncaughtExceptionHandler( + new RMCriticalThreadUncaughtExceptionHandler(rmContext)); + updateThread.setDaemon(true); + } + super.serviceInit(conf); } + @Override + protected void serviceStart() throws Exception { + if (updateThread != null) { + updateThread.start(); + } + super.serviceStart(); + } + + @Override + protected void serviceStop() throws Exception { + if (updateThread != null) { + updateThread.interrupt(); + updateThread.join(THREAD_JOIN_TIMEOUT_MS); + } + super.serviceStop(); + } + @VisibleForTesting public ClusterNodeTracker getNodeTracker() { return nodeTracker; @@ -529,6 +570,7 @@ private RMContainer recoverAndCreateContainer(NMContainerStatus status, node.getHttpAddress(), status.getAllocatedResource(), status.getPriority(), null); container.setVersion(status.getVersion()); + container.setExecutionType(status.getExecutionType()); ApplicationAttemptId attemptId = container.getId().getApplicationAttemptId(); RMContainer rmContainer = new RMContainerImpl(container, @@ -1280,7 +1322,121 @@ public List getNodeIds(String resourceName) { * @param container Container. */ public void asyncContainerRelease(RMContainer container) { - this.rmContext.getDispatcher().getEventHandler() - .handle(new ReleaseContainerEvent(container)); + this.rmContext.getDispatcher().getEventHandler().handle( + new ReleaseContainerEvent(container)); + } + + /* + * Get a Resource object with for the minimum allocation possible. If resource + * profiles are enabled then the 'minimum' resource profile will be used. If + * they are not enabled, use the minimums specified in the config files. + * + * @return a Resource object with the minimum allocation for the scheduler + */ + public Resource getMinimumAllocation() { + boolean profilesEnabled = getConfig() + .getBoolean(YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED, + YarnConfiguration.DEFAULT_RM_RESOURCE_PROFILES_ENABLED); + Resource ret; + if (!profilesEnabled) { + ret = ResourceUtils.getResourceTypesMinimumAllocation(); + } else { + try { + ret = rmContext.getResourceProfilesManager().getMinimumProfile(); + } catch (YarnException e) { + LOG.error( + "Exception while getting minimum profile from profile manager:", e); + throw new YarnRuntimeException(e); + } + } + LOG.info("Minimum allocation = " + ret); + return ret; + } + + /** + * Get a Resource object with for the maximum allocation possible. If resource + * profiles are enabled then the 'maximum' resource profile will be used. If + * they are not enabled, use the maximums specified in the config files. + * + * @return a Resource object with the maximum allocation for the scheduler + */ + + public Resource getMaximumAllocation() { + boolean profilesEnabled = getConfig() + .getBoolean(YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED, + YarnConfiguration.DEFAULT_RM_RESOURCE_PROFILES_ENABLED); + Resource ret; + if (!profilesEnabled) { + ret = ResourceUtils.getResourceTypesMaximumAllocation(); + } else { + try { + ret = rmContext.getResourceProfilesManager().getMaximumProfile(); + } catch (YarnException e) { + LOG.error( + "Exception while getting maximum profile from ResourceProfileManager:", + e); + throw new YarnRuntimeException(e); + } + } + LOG.info("Maximum allocation = " + ret); + return ret; + } + + @Override + public long checkAndGetApplicationLifetime(String queueName, long lifetime) { + // -1 indicates, lifetime is not configured. + return -1; + } + + @Override + public long getMaximumApplicationLifetime(String queueName) { + return -1; + } + + /** + * Update internal state of the scheduler. This can be useful for scheduler + * implementations that maintain some state that needs to be periodically + * updated; for example, metrics or queue resources. It will be called by the + * {@link UpdateThread} every {@link #updateInterval}. By default, it will + * not run; subclasses should set {@link #updateInterval} to a + * positive value during {@link #serviceInit(Configuration)} if they want to + * enable the thread. + */ + @VisibleForTesting + public void update() { + // do nothing by default + } + + /** + * Thread which calls {@link #update()} every + * updateInterval milliseconds. + */ + private class UpdateThread extends Thread { + @Override + public void run() { + while (!Thread.currentThread().isInterrupted()) { + try { + synchronized (updateThreadMonitor) { + updateThreadMonitor.wait(updateInterval); + } + update(); + } catch (InterruptedException ie) { + LOG.warn("Scheduler UpdateThread interrupted. Exiting."); + return; + } catch (Exception e) { + LOG.error("Exception in scheduler UpdateThread", e); + } + } + } + } + + /** + * Allows {@link UpdateThread} to start processing without waiting till + * {@link #updateInterval}. + */ + protected void triggerUpdate() { + synchronized (updateThreadMonitor) { + updateThreadMonitor.notify(); + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java index 8acf7d55dbb..082ec14d433 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java @@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; @@ -525,6 +526,9 @@ public void transferStateFromPreviousAppSchedulingInfo( } public void recoverContainer(RMContainer rmContainer, String partition) { + if (rmContainer.getExecutionType() != ExecutionType.GUARANTEED) { + return; + } try { this.writeLock.lock(); QueueMetrics metrics = queue.getMetrics(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java index 010e64506b6..ccec6bc6a89 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java @@ -219,10 +219,15 @@ public Resource getMaxAllowedAllocation() { return configuredMaxAllocation; } - return Resources.createResource( - Math.min(configuredMaxAllocation.getMemorySize(), maxNodeMemory), - Math.min(configuredMaxAllocation.getVirtualCores(), maxNodeVCores) - ); + Resource ret = Resources.clone(configuredMaxAllocation); + if (ret.getMemorySize() > maxNodeMemory) { + ret.setMemorySize(maxNodeMemory); + } + if (ret.getVirtualCores() > maxNodeVCores) { + ret.setVirtualCores(maxNodeVCores); + } + + return ret; } finally { readLock.unlock(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java index f9a72191a67..db63cd868d0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java @@ -55,11 +55,13 @@ import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.UpdateContainerError; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.server.api.ContainerType; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage; @@ -107,9 +109,7 @@ public class SchedulerApplicationAttempt implements SchedulableEntity { private static final long MEM_AGGREGATE_ALLOCATION_CACHE_MSECS = 3000; protected long lastMemoryAggregateAllocationUpdateTime = 0; - private long lastMemorySeconds = 0; - private long lastVcoreSeconds = 0; - + private Map lastResourceSecondsMap = new HashMap<>(); protected final AppSchedulingInfo appSchedulingInfo; protected ApplicationAttemptId attemptId; protected Map liveContainers = @@ -690,7 +690,8 @@ private Container updateContainerAndNMToken(RMContainer rmContainer, if (autoUpdate) { this.rmContext.getDispatcher().getEventHandler().handle( new RMNodeUpdateContainerEvent(rmContainer.getNodeId(), - Collections.singletonList(rmContainer.getContainer()))); + Collections.singletonMap( + rmContainer.getContainer(), updateType))); } else { rmContainer.handle(new RMContainerUpdatesAcquiredEvent( rmContainer.getContainerId(), @@ -1001,22 +1002,23 @@ private AggregateAppResourceUsage getRunningAggregateAppResourceUsage() { // recently. if ((currentTimeMillis - lastMemoryAggregateAllocationUpdateTime) > MEM_AGGREGATE_ALLOCATION_CACHE_MSECS) { - long memorySeconds = 0; - long vcoreSeconds = 0; + Map resourceSecondsMap = new HashMap<>(); for (RMContainer rmContainer : this.liveContainers.values()) { long usedMillis = currentTimeMillis - rmContainer.getCreationTime(); Resource resource = rmContainer.getContainer().getResource(); - memorySeconds += resource.getMemorySize() * usedMillis / - DateUtils.MILLIS_PER_SECOND; - vcoreSeconds += resource.getVirtualCores() * usedMillis - / DateUtils.MILLIS_PER_SECOND; + for (ResourceInformation entry : resource.getResources()) { + long value = RMServerUtils + .getOrDefault(resourceSecondsMap, entry.getName(), 0L); + value += entry.getValue() * usedMillis + / DateUtils.MILLIS_PER_SECOND; + resourceSecondsMap.put(entry.getName(), value); + } } lastMemoryAggregateAllocationUpdateTime = currentTimeMillis; - lastMemorySeconds = memorySeconds; - lastVcoreSeconds = vcoreSeconds; + lastResourceSecondsMap = resourceSecondsMap; } - return new AggregateAppResourceUsage(lastMemorySeconds, lastVcoreSeconds); + return new AggregateAppResourceUsage(lastResourceSecondsMap); } public ApplicationResourceUsageReport getResourceUsageReport() { @@ -1031,6 +1033,11 @@ public ApplicationResourceUsageReport getResourceUsageReport() { Resource cluster = rmContext.getScheduler().getClusterResource(); ResourceCalculator calc = rmContext.getScheduler().getResourceCalculator(); + Map preemptedResourceSecondsMaps = new HashMap<>(); + preemptedResourceSecondsMaps + .put(ResourceInformation.MEMORY_MB.getName(), 0L); + preemptedResourceSecondsMaps + .put(ResourceInformation.VCORES.getName(), 0L); float queueUsagePerc = 0.0f; float clusterUsagePerc = 0.0f; if (!calc.isInvalidDivisor(cluster)) { @@ -1040,15 +1047,15 @@ public ApplicationResourceUsageReport getResourceUsageReport() { queueUsagePerc = calc.divide(cluster, usedResourceClone, Resources.multiply(cluster, queueCapacityPerc)) * 100; } - clusterUsagePerc = calc.divide(cluster, usedResourceClone, cluster) - * 100; + clusterUsagePerc = + calc.divide(cluster, usedResourceClone, cluster) * 100; } - return ApplicationResourceUsageReport.newInstance(liveContainers.size(), - reservedContainers.size(), usedResourceClone, reservedResourceClone, - Resources.add(usedResourceClone, reservedResourceClone), - runningResourceUsage.getMemorySeconds(), - runningResourceUsage.getVcoreSeconds(), queueUsagePerc, - clusterUsagePerc, 0, 0); + return ApplicationResourceUsageReport + .newInstance(liveContainers.size(), reservedContainers.size(), + usedResourceClone, reservedResourceClone, + Resources.add(usedResourceClone, reservedResourceClone), + runningResourceUsage.getResourceUsageSecondsMap(), queueUsagePerc, + clusterUsagePerc, preemptedResourceSecondsMaps); } finally { writeLock.unlock(); } @@ -1131,9 +1138,11 @@ public void recoverContainer(SchedulerNode node, } LOG.info("SchedulerAttempt " + getApplicationAttemptId() + " is recovering container " + rmContainer.getContainerId()); - liveContainers.put(rmContainer.getContainerId(), rmContainer); - attemptResourceUsage.incUsed(node.getPartition(), - rmContainer.getContainer().getResource()); + addRMContainer(rmContainer.getContainerId(), rmContainer); + if (rmContainer.getExecutionType() == ExecutionType.GUARANTEED) { + attemptResourceUsage.incUsed(node.getPartition(), + rmContainer.getContainer().getResource()); + } // resourceLimit: updated when LeafQueue#recoverContainer#allocateResource // is called. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java index 272537c8bf6..90fa3e4ebc0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java @@ -170,14 +170,6 @@ protected synchronized void allocateContainer(RMContainer rmContainer, launchedContainers.put(container.getId(), new ContainerInfo(rmContainer, launchedOnNode)); - - if (LOG.isDebugEnabled()) { - LOG.debug("Assigned container " + container.getId() + " of capacity " - + container.getResource() + " on host " + rmNode.getNodeAddress() - + ", which has " + numContainers + " containers, " - + getAllocatedResource() + " used and " + getUnallocatedResource() - + " available after allocation"); - } } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java index 7b554db4705..c558b8dd912 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java @@ -39,10 +39,12 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException; import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.security.AccessType; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; @@ -253,6 +255,14 @@ public static void normalizeAndvalidateRequest(ResourceRequest resReq, private static void validateResourceRequest(ResourceRequest resReq, Resource maximumResource, QueueInfo queueInfo, RMContext rmContext) throws InvalidResourceRequestException { + try { + RMServerUtils.convertProfileToResourceCapability(resReq, + rmContext.getYarnConfiguration(), + rmContext.getResourceProfilesManager()); + } catch (YarnException ye) { + throw new InvalidResourceRequestException(ye); + } + if (resReq.getCapability().getMemorySize() < 0 || resReq.getCapability().getMemorySize() > maximumResource.getMemorySize()) { throw new InvalidResourceRequestException("Invalid resource request" diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java index 08e0603a843..111998bae6f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java @@ -385,4 +385,24 @@ List getPendingResourceRequestsForAttempt( * @return the normalized resource */ Resource getNormalizedResource(Resource requestedResource); + + /** + * Verify whether a submitted application lifetime is valid as per configured + * Queue lifetime. + * @param queueName Name of the Queue + * @param lifetime configured application lifetime + * @return valid lifetime as per queue + */ + @Public + @Evolving + long checkAndGetApplicationLifetime(String queueName, long lifetime); + + /** + * Get maximum lifetime for a queue. + * @param queueName to get lifetime + * @return maximum lifetime in seconds + */ + @Public + @Evolving + long getMaximumApplicationLifetime(String queueName); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java index d7c452a1ffc..250f4e6b9a7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java @@ -909,7 +909,7 @@ public boolean accept(Resource cluster, maxResourceLimit = labelManager.getResourceByLabel( schedulerContainer.getNodePartition(), cluster); } - if (!Resources.fitsIn(resourceCalculator, cluster, + if (!Resources.fitsIn(resourceCalculator, Resources.add(queueUsage.getUsed(partition), netAllocated), maxResourceLimit)) { if (LOG.isDebugEnabled()) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index fde84c48def..7f50272240b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -291,8 +291,8 @@ void initScheduler(Configuration configuration) throws writeLock.lock(); this.conf = loadCapacitySchedulerConfiguration(configuration); validateConf(this.conf); - this.minimumAllocation = this.conf.getMinimumAllocation(); - initMaximumResourceCapability(this.conf.getMaximumAllocation()); + this.minimumAllocation = super.getMinimumAllocation(); + initMaximumResourceCapability(super.getMaximumAllocation()); this.calculator = this.conf.getResourceCalculator(); this.usePortForNodeName = this.conf.getUsePortForNodeName(); this.applications = new ConcurrentHashMap<>(); @@ -2564,4 +2564,47 @@ public boolean moveReservedContainer(RMContainer toBeMovedContainer, writeLock.unlock(); } } + + @Override + public long checkAndGetApplicationLifetime(String queueName, + long lifetimeRequestedByApp) { + try { + readLock.lock(); + CSQueue queue = getQueue(queueName); + if (queue == null || !(queue instanceof LeafQueue)) { + return lifetimeRequestedByApp; + } + + long defaultApplicationLifetime = + ((LeafQueue) queue).getDefaultApplicationLifetime(); + long maximumApplicationLifetime = + ((LeafQueue) queue).getMaximumApplicationLifetime(); + + // check only for maximum, that's enough because default cann't + // exceed maximum + if (maximumApplicationLifetime <= 0) { + return lifetimeRequestedByApp; + } + + if (lifetimeRequestedByApp <= 0) { + return defaultApplicationLifetime; + } else if (lifetimeRequestedByApp > maximumApplicationLifetime) { + return maximumApplicationLifetime; + } + return lifetimeRequestedByApp; + } finally { + readLock.unlock(); + } + } + + @Override + public long getMaximumApplicationLifetime(String queueName) { + CSQueue queue = getQueue(queueName); + if (queue == null || !(queue instanceof LeafQueue)) { + LOG.error("Unknown queue: " + queueName); + return -1; + } + // In seconds + return ((LeafQueue) queue).getMaximumApplicationLifetime(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java index 13b9ff69f2a..3a519ecf5f1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java @@ -1496,4 +1496,30 @@ public boolean getAssignMultipleEnabled() { public int getMaxAssignPerHeartbeat() { return getInt(MAX_ASSIGN_PER_HEARTBEAT, DEFAULT_MAX_ASSIGN_PER_HEARTBEAT); } + + public static final String MAXIMUM_LIFETIME_SUFFIX = + "maximum-application-lifetime"; + + public static final String DEFAULT_LIFETIME_SUFFIX = + "default-application-lifetime"; + + public long getMaximumLifetimePerQueue(String queue) { + long maximumLifetimePerQueue = getLong( + getQueuePrefix(queue) + MAXIMUM_LIFETIME_SUFFIX, (long) UNDEFINED); + return maximumLifetimePerQueue; + } + + public void setMaximumLifetimePerQueue(String queue, long maximumLifetime) { + setLong(getQueuePrefix(queue) + MAXIMUM_LIFETIME_SUFFIX, maximumLifetime); + } + + public long getDefaultLifetimePerQueue(String queue) { + long maximumLifetimePerQueue = getLong( + getQueuePrefix(queue) + DEFAULT_LIFETIME_SUFFIX, (long) UNDEFINED); + return maximumLifetimePerQueue; + } + + public void setDefaultLifetimePerQueue(String queue, long defaultLifetime) { + setLong(getQueuePrefix(queue) + DEFAULT_LIFETIME_SUFFIX, defaultLifetime); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index d15431e77e7..f24e30aa1ee 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.api.records.QueueState; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; @@ -130,6 +131,10 @@ public class LeafQueue extends AbstractCSQueue { List priorityAcls = new ArrayList(); + // -1 indicates lifetime is disabled + private volatile long maxApplicationLifetime = -1; + private volatile long defaultApplicationLifetime = -1; + @SuppressWarnings({ "unchecked", "rawtypes" }) public LeafQueue(CapacitySchedulerContext cs, String queueName, CSQueue parent, CSQueue old) throws IOException { @@ -238,6 +243,18 @@ protected void setupQueueConfigs(Resource clusterResource) defaultAppPriorityPerQueue = Priority.newInstance( conf.getDefaultApplicationPriorityConfPerQueue(getQueuePath())); + maxApplicationLifetime = + conf.getMaximumLifetimePerQueue((getQueuePath())); + defaultApplicationLifetime = + conf.getDefaultLifetimePerQueue((getQueuePath())); + if (defaultApplicationLifetime > maxApplicationLifetime) { + throw new YarnRuntimeException( + "Default lifetime" + defaultApplicationLifetime + + " can't exceed maximum lifetime " + maxApplicationLifetime); + } + defaultApplicationLifetime = defaultApplicationLifetime > 0 + ? defaultApplicationLifetime : maxApplicationLifetime; + // Validate leaf queue's user's weights. int queueUL = Math.min(100, conf.getUserLimit(getQueuePath())); for (Entry e : getUserWeights().entrySet()) { @@ -293,7 +310,10 @@ protected void setupQueueConfigs(Resource clusterResource) + "reservationsContinueLooking = " + reservationsContinueLooking + "\n" + "preemptionDisabled = " + getPreemptionDisabled() + "\n" + "defaultAppPriorityPerQueue = " - + defaultAppPriorityPerQueue + "\npriority = " + priority); + + defaultAppPriorityPerQueue + "\npriority = " + priority + + "\nmaxLifetime = " + maxApplicationLifetime + " seconds" + + "\ndefaultLifetime = " + + defaultApplicationLifetime + " seconds"); } finally { writeLock.unlock(); } @@ -1782,7 +1802,9 @@ public void recoverContainer(Resource clusterResource, if (rmContainer.getState().equals(RMContainerState.COMPLETED)) { return; } - + if (rmContainer.getExecutionType() != ExecutionType.GUARANTEED) { + return; + } // Careful! Locking order is important! try { writeLock.lock(); @@ -2086,4 +2108,12 @@ static class CachedUserLimit { this.userLimit = userLimit; } } + + public long getMaximumApplicationLifetime() { + return maxApplicationLifetime; + } + + public long getDefaultApplicationLifetime() { + return defaultApplicationLifetime; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java index 2e48000c09b..6800b74f8d4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java @@ -37,6 +37,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.QueueState; @@ -863,6 +864,9 @@ public void recoverContainer(Resource clusterResource, if (rmContainer.getState().equals(RMContainerState.COMPLETED)) { return; } + if (rmContainer.getExecutionType() != ExecutionType.GUARANTEED) { + return; + } // Careful! Locking order is important! try { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java index 5f7d185c8d2..33f30b00412 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java @@ -731,7 +731,9 @@ partitionResource, getUsageRatio(nodePartition), * should be higher than queue-hard-limit * ulMin */ float usersSummedByWeight = activeUsersTimesWeights; - Resource resourceUsed = totalResUsageForActiveUsers.getUsed(nodePartition); + Resource resourceUsed = Resources.add( + totalResUsageForActiveUsers.getUsed(nodePartition), + required); // For non-activeUser calculation, consider all users count. if (!activeUser) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java index f753d31fdbf..72dfbdd6dfb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java @@ -548,10 +548,7 @@ private ContainerAllocation assignContainer(Resource clusterResource, toKillContainers.add(killableContainer); Resources.addTo(availableAndKillable, killableContainer.getAllocatedResource()); - if (Resources.fitsIn(rc, - clusterResource, - capability, - availableAndKillable)) { + if (Resources.fitsIn(rc, capability, availableAndKillable)) { // Stop if we find enough spaces availableContainers = 1; break; @@ -579,8 +576,7 @@ private ContainerAllocation assignContainer(Resource clusterResource, // under the limit. resourceNeedToUnReserve = capability; } - unreservedContainer = - application.findNodeToUnreserve(clusterResource, node, + unreservedContainer = application.findNodeToUnreserve(node, schedulerKey, resourceNeedToUnReserve); // When (minimum-unreserved-resource > 0 OR we cannot allocate // new/reserved diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java index 17bb104605d..a12c5ec7f68 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java @@ -314,7 +314,6 @@ private boolean anyContainerInFinalState( } private boolean commonCheckContainerAllocation( - Resource cluster, ContainerAllocationProposal allocation, SchedulerContainer schedulerContainer) { // Make sure node is not reserved by anyone else @@ -355,8 +354,7 @@ private boolean commonCheckContainerAllocation( } } } - if (!Resources.fitsIn(rc, cluster, - allocation.getAllocatedOrReservedResource(), + if (!Resources.fitsIn(rc, allocation.getAllocatedOrReservedResource(), availableResource)) { if (LOG.isDebugEnabled()) { LOG.debug("Node doesn't have enough available resource, asked=" @@ -419,8 +417,7 @@ public boolean accept(Resource cluster, // Common part of check container allocation regardless if it is a // increase container or regular container - commonCheckContainerAllocation(cluster, allocation, - schedulerContainer); + commonCheckContainerAllocation(allocation, schedulerContainer); } else { // Container reserved first time will be NEW, after the container // accepted & confirmed, it will become RESERVED state @@ -721,9 +718,8 @@ public Allocation getAllocation(ResourceCalculator resourceCalculator, } @VisibleForTesting - public NodeId getNodeIdToUnreserve( - SchedulerRequestKey schedulerKey, Resource resourceNeedUnreserve, - ResourceCalculator rc, Resource clusterResource) { + public NodeId getNodeIdToUnreserve(SchedulerRequestKey schedulerKey, + Resource resourceNeedUnreserve, ResourceCalculator resourceCalculator) { // first go around make this algorithm simple and just grab first // reservation that has enough resources Map reservedContainers = this.reservedContainers.get( @@ -738,7 +734,7 @@ public NodeId getNodeIdToUnreserve( // make sure we unreserve one with at least the same amount of // resources, otherwise could affect capacity limits - if (Resources.fitsIn(rc, clusterResource, resourceNeedUnreserve, + if (Resources.fitsIn(resourceCalculator, resourceNeedUnreserve, reservedResource)) { if (LOG.isDebugEnabled()) { LOG.debug( @@ -806,14 +802,13 @@ public void reserve(SchedulerRequestKey schedulerKey, FiCaSchedulerNode node, } @VisibleForTesting - public RMContainer findNodeToUnreserve(Resource clusterResource, - FiCaSchedulerNode node, SchedulerRequestKey schedulerKey, - Resource minimumUnreservedResource) { + public RMContainer findNodeToUnreserve(FiCaSchedulerNode node, + SchedulerRequestKey schedulerKey, Resource minimumUnreservedResource) { try { readLock.lock(); // need to unreserve some other container first NodeId idToUnreserve = getNodeIdToUnreserve(schedulerKey, - minimumUnreservedResource, rc, clusterResource); + minimumUnreservedResource, rc); if (idToUnreserve == null) { if (LOG.isDebugEnabled()) { LOG.debug("checked to see if could unreserve for app but nothing " diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java index c26a11bf8af..7277779b1c7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java @@ -160,4 +160,17 @@ public synchronized Resource getTotalKillableResources() { public synchronized Map getKillableContainers() { return Collections.unmodifiableMap(killableContainers); } + + protected synchronized void allocateContainer(RMContainer rmContainer, + boolean launchedOnNode) { + super.allocateContainer(rmContainer, launchedOnNode); + + final Container container = rmContainer.getContainer(); + LOG.info("Assigned container " + container.getId() + " of capacity " + + container.getResource() + " on host " + getRMNode().getNodeAddress() + + ", which has " + getNumContainers() + " containers, " + + getAllocatedResource() + " used and " + getUnallocatedResource() + + " available after allocation"); + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java index 71e6f7fd7df..7bd69594856 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java @@ -30,7 +30,6 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.security.AccessType; import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSchedulerConfiguration; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; @@ -51,7 +50,7 @@ public class AllocationConfiguration extends ReservationSchedulerConfiguration { // Maximum amount of resources for each queue's ad hoc children private final Map maxChildQueueResources; // Sharing weights for each queue - private final Map queueWeights; + private final Map queueWeights; // Max concurrent running applications for each queue and for each user; in addition, // for users that have no max specified, we use the userMaxJobsDefault. @@ -112,10 +111,12 @@ public class AllocationConfiguration extends ReservationSchedulerConfiguration { public AllocationConfiguration(Map minQueueResources, Map maxQueueResources, Map maxChildQueueResources, - Map queueMaxApps, Map userMaxApps, - Map queueWeights, + Map queueMaxApps, + Map userMaxApps, + Map queueWeights, Map queueMaxAMShares, int userMaxAppsDefault, - int queueMaxAppsDefault, Resource queueMaxResourcesDefault, + int queueMaxAppsDefault, + Resource queueMaxResourcesDefault, float queueMaxAMShareDefault, Map schedulingPolicies, SchedulingPolicy defaultSchedulingPolicy, @@ -253,9 +254,9 @@ public boolean isPreemptable(String queueName) { return !nonPreemptableQueues.contains(queueName); } - private ResourceWeights getQueueWeight(String queue) { - ResourceWeights weight = queueWeights.get(queue); - return (weight == null) ? ResourceWeights.NEUTRAL : weight; + private float getQueueWeight(String queue) { + Float weight = queueWeights.get(queue); + return (weight == null) ? 1.0f : weight; } public int getUserMaxApps(String user) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java index 313a27ae378..4d918c1d6f4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java @@ -45,7 +45,6 @@ import org.apache.hadoop.yarn.security.Permission; import org.apache.hadoop.yarn.security.PrivilegedEntity; import org.apache.hadoop.yarn.security.PrivilegedEntity.EntityType; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FifoPolicy; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.util.Clock; @@ -232,7 +231,7 @@ public synchronized void reloadAllocations() throws IOException, Map queueMaxApps = new HashMap<>(); Map userMaxApps = new HashMap<>(); Map queueMaxAMShares = new HashMap<>(); - Map queueWeights = new HashMap<>(); + Map queueWeights = new HashMap<>(); Map queuePolicies = new HashMap<>(); Map minSharePreemptionTimeouts = new HashMap<>(); Map fairSharePreemptionTimeouts = new HashMap<>(); @@ -454,7 +453,7 @@ private void loadQueue(String parentName, Element element, Map queueMaxApps, Map userMaxApps, Map queueMaxAMShares, - Map queueWeights, + Map queueWeights, Map queuePolicies, Map minSharePreemptionTimeouts, Map fairSharePreemptionTimeouts, @@ -522,7 +521,7 @@ private void loadQueue(String parentName, Element element, } else if ("weight".equals(field.getTagName())) { String text = ((Text)field.getFirstChild()).getData().trim(); double val = Double.parseDouble(text); - queueWeights.put(queueName, new ResourceWeights((float)val)); + queueWeights.put(queueName, (float)val); } else if ("minSharePreemptionTimeout".equals(field.getTagName())) { String text = ((Text)field.getFirstChild()).getData().trim(); long val = Long.parseLong(text) * 1000L; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java index fe57d1b9c3a..30245586a0d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java @@ -43,7 +43,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; @@ -75,7 +74,6 @@ public class FSAppAttempt extends SchedulerApplicationAttempt private final long startTime; private final Priority appPriority; - private final ResourceWeights resourceWeights; private Resource demand = Resources.createResource(0); private final FairScheduler scheduler; private Resource fairShare = Resources.createResource(0, 0); @@ -120,11 +118,6 @@ public FSAppAttempt(FairScheduler scheduler, this.startTime = scheduler.getClock().getTime(); this.lastTimeAtFairShare = this.startTime; this.appPriority = Priority.newInstance(1); - this.resourceWeights = new ResourceWeights(); - } - - ResourceWeights getResourceWeights() { - return resourceWeights; } /** @@ -614,9 +607,16 @@ boolean canContainerBePreempted(RMContainer container) { // Check if the app's allocation will be over its fairshare even // after preempting this container - Resource usageAfterPreemption = Resources.subtract( - getResourceUsage(), container.getAllocatedResource()); + Resource usageAfterPreemption = Resources.clone(getResourceUsage()); + // Subtract resources of containers already queued for preemption + synchronized (preemptionVariablesLock) { + Resources.subtractFrom(usageAfterPreemption, resourcesToBePreempted); + } + + // Subtract this container's allocation to compute usage after preemption + Resources.subtractFrom( + usageAfterPreemption, container.getAllocatedResource()); return !isUsageBelowShare(usageAfterPreemption, getFairShare()); } @@ -1270,17 +1270,11 @@ public Resource getMaxShare() { @Override public Resource getResourceUsage() { - // Subtract copies the object, so that we have a snapshot, - // in case usage changes, while the caller is using the value - synchronized (preemptionVariablesLock) { - return containersToBePreempted.isEmpty() - ? getCurrentConsumption() - : Resources.subtract(getCurrentConsumption(), resourcesToBePreempted); - } + return getCurrentConsumption(); } @Override - public ResourceWeights getWeights() { + public float getWeight() { return scheduler.getAppWeight(this); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java index b911a1ae71a..1dcfffcd1d8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java @@ -37,7 +37,6 @@ import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppUtils; @@ -553,7 +552,7 @@ public void recoverContainer(Resource clusterResource, * @param weight queue weight */ public void setWeights(float weight) { - this.weights = new ResourceWeights(weight); + this.weights = weight; } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java index 10168232a94..8ae3cb69496 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java @@ -42,7 +42,6 @@ import org.apache.hadoop.yarn.security.PrivilegedEntity; import org.apache.hadoop.yarn.security.PrivilegedEntity.EntityType; import org.apache.hadoop.yarn.security.YarnAuthorizationProvider; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.util.resource.Resources; @@ -70,7 +69,7 @@ public abstract class FSQueue implements Queue, Schedulable { protected SchedulingPolicy policy = SchedulingPolicy.DEFAULT_POLICY; - protected ResourceWeights weights; + protected float weights; protected Resource minShare; protected Resource maxShare; protected int maxRunningApps; @@ -140,12 +139,12 @@ public void setPolicy(SchedulingPolicy policy) { this.policy = policy; } - public void setWeights(ResourceWeights weights){ + public void setWeights(float weights) { this.weights = weights; } @Override - public ResourceWeights getWeights() { + public float getWeight() { return weights; } @@ -439,7 +438,7 @@ public boolean isActive() { @Override public String toString() { return String.format("[%s, demand=%s, running=%s, share=%s, w=%s]", - getName(), getDemand(), getResourceUsage(), fairShare, getWeights()); + getName(), getDemand(), getResourceUsage(), fairShare, getWeight()); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerNode.java index 93646f47ca6..44ec9c3bc80 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerNode.java @@ -25,6 +25,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; @@ -239,6 +240,15 @@ Set getContainersForPreemption() { protected synchronized void allocateContainer(RMContainer rmContainer, boolean launchedOnNode) { super.allocateContainer(rmContainer, launchedOnNode); + if (LOG.isDebugEnabled()) { + final Container container = rmContainer.getContainer(); + LOG.debug("Assigned container " + container.getId() + " of capacity " + + container.getResource() + " on host " + getRMNode().getNodeAddress() + + ", which has " + getNumContainers() + " containers, " + + getAllocatedResource() + " used and " + getUnallocatedResource() + + " available after allocation"); + } + Resource allocated = rmContainer.getAllocatedResource(); if (!Resources.isNone(allocated)) { // check for satisfied preemption request and update bookkeeping diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index c5212501b34..9cf2b2e764d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -54,7 +54,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMCriticalThreadUncaughtExceptionHandler; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationConstants; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; @@ -151,24 +150,14 @@ public class FairScheduler extends // reserved public static final Resource CONTAINER_RESERVED = Resources.createResource(-1); - // How often fair shares are re-calculated (ms) - protected long updateInterval; private final int UPDATE_DEBUG_FREQUENCY = 25; private int updatesToSkipForDebug = UPDATE_DEBUG_FREQUENCY; - @VisibleForTesting - Thread updateThread; - - private final Object updateThreadMonitor = new Object(); - @VisibleForTesting Thread schedulingThread; Thread preemptionThread; - // timeout to join when we stop this service - protected final long THREAD_JOIN_TIMEOUT_MS = 1000; - // Aggregate metrics FSQueueMetrics rootMetrics; FSOpDurations fsOpDurations; @@ -292,40 +281,6 @@ public QueueManager getQueueManager() { return queueMgr; } - // Allows UpdateThread to start processing without waiting till updateInterval - void triggerUpdate() { - synchronized (updateThreadMonitor) { - updateThreadMonitor.notify(); - } - } - - /** - * Thread which calls {@link FairScheduler#update()} every - * updateInterval milliseconds. - */ - private class UpdateThread extends Thread { - - @Override - public void run() { - while (!Thread.currentThread().isInterrupted()) { - try { - synchronized (updateThreadMonitor) { - updateThreadMonitor.wait(updateInterval); - } - long start = getClock().getTime(); - update(); - long duration = getClock().getTime() - start; - fsOpDurations.addUpdateThreadRunDuration(duration); - } catch (InterruptedException ie) { - LOG.warn("Update thread interrupted. Exiting."); - return; - } catch (Exception e) { - LOG.error("Exception in fair scheduler UpdateThread", e); - } - } - } - } - /** * Thread which attempts scheduling resources continuously, * asynchronous to the node heartbeats. @@ -367,7 +322,10 @@ private void dumpSchedulerState() { * required resources per job. */ @VisibleForTesting + @Override public void update() { + // Storing start time for fsOpDurations + long start = getClock().getTime(); FSQueue rootQueue = queueMgr.getRootQueue(); // Update demands and fairshares @@ -402,6 +360,7 @@ public void update() { } finally { readLock.unlock(); } + fsOpDurations.addUpdateThreadRunDuration(getClock().getTime() - start); } public RMContainerTokenSecretManager @@ -409,7 +368,7 @@ public void update() { return rmContext.getContainerTokenSecretManager(); } - public ResourceWeights getAppWeight(FSAppAttempt app) { + public float getAppWeight(FSAppAttempt app) { try { readLock.lock(); double weight = 1.0; @@ -417,14 +376,10 @@ public ResourceWeights getAppWeight(FSAppAttempt app) { // Set weight based on current memory demand weight = Math.log1p(app.getDemand().getMemorySize()) / Math.log(2); } - weight *= app.getPriority().getPriority(); - ResourceWeights resourceWeights = app.getResourceWeights(); - resourceWeights.setWeight((float) weight); - return resourceWeights; + return (float)weight * app.getPriority().getPriority(); } finally { readLock.unlock(); } - } public Resource getIncrementResourceCapability() { @@ -1298,8 +1253,8 @@ private void initScheduler(Configuration conf) throws IOException { this.conf = new FairSchedulerConfiguration(conf); validateConf(this.conf); authorizer = YarnAuthorizationProvider.getInstance(conf); - minimumAllocation = this.conf.getMinimumAllocation(); - initMaximumResourceCapability(this.conf.getMaximumAllocation()); + minimumAllocation = super.getMinimumAllocation(); + initMaximumResourceCapability(super.getMaximumAllocation()); incrAllocation = this.conf.getIncrementAllocation(); updateReservationThreshold(); continuousSchedulingEnabled = this.conf.isContinuousSchedulingEnabled(); @@ -1320,7 +1275,7 @@ private void initScheduler(Configuration conf) throws IOException { updateInterval = FairSchedulerConfiguration.DEFAULT_UPDATE_INTERVAL_MS; LOG.warn(FairSchedulerConfiguration.UPDATE_INTERVAL_MS + " is invalid, so using default value " - + +FairSchedulerConfiguration.DEFAULT_UPDATE_INTERVAL_MS + + FairSchedulerConfiguration.DEFAULT_UPDATE_INTERVAL_MS + " ms instead"); } @@ -1339,12 +1294,6 @@ private void initScheduler(Configuration conf) throws IOException { throw new IOException("Failed to start FairScheduler", e); } - updateThread = new UpdateThread(); - updateThread.setName("FairSchedulerUpdateThread"); - updateThread.setUncaughtExceptionHandler( - new RMCriticalThreadUncaughtExceptionHandler(rmContext)); - updateThread.setDaemon(true); - if (continuousSchedulingEnabled) { // start continuous scheduling thread schedulingThread = new ContinuousSchedulingThread(); @@ -1391,9 +1340,7 @@ private void updateReservationThreshold() { private void startSchedulerThreads() { try { writeLock.lock(); - Preconditions.checkNotNull(updateThread, "updateThread is null"); Preconditions.checkNotNull(allocsLoader, "allocsLoader is null"); - updateThread.start(); if (continuousSchedulingEnabled) { Preconditions.checkNotNull(schedulingThread, "schedulingThread is null"); @@ -1424,10 +1371,6 @@ public void serviceStart() throws Exception { public void serviceStop() throws Exception { try { writeLock.lock(); - if (updateThread != null) { - updateThread.interrupt(); - updateThread.join(THREAD_JOIN_TIMEOUT_MS); - } if (continuousSchedulingEnabled) { if (schedulingThread != null) { schedulingThread.interrupt(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java index fcdc056577e..4d6af982ae3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java @@ -22,7 +22,6 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; /** * A Schedulable represents an entity that can be scheduled such as an @@ -72,8 +71,15 @@ public interface Schedulable { /** Maximum Resource share assigned to the schedulable. */ Resource getMaxShare(); - /** Job/queue weight in fair sharing. */ - ResourceWeights getWeights(); + /** + * Job/queue weight in fair sharing. Weights are only meaningful when + * compared. A weight of 2.0f has twice the weight of a weight of 1.0f, + * which has twice the weight of a weight of 0.5f. A weight of 1.0f is + * considered unweighted or a neutral weight. A weight of 0 is no weight. + * + * @return the weight + */ + float getWeight(); /** Start time for jobs in FIFO queues; meaningless for QueueSchedulables.*/ long getStartTime(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java index 440c73cefdd..0a21b026714 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java @@ -21,7 +21,6 @@ import java.util.Collection; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Schedulable; @@ -47,7 +46,7 @@ public class ComputeFairShares { */ public static void computeShares( Collection schedulables, Resource totalResources, - ResourceType type) { + String type) { computeSharesInternal(schedulables, totalResources, type, false); } @@ -62,7 +61,7 @@ public static void computeShares( */ public static void computeSteadyShares( Collection queues, Resource totalResources, - ResourceType type) { + String type) { computeSharesInternal(queues, totalResources, type, true); } @@ -110,9 +109,9 @@ public static void computeSteadyShares( */ private static void computeSharesInternal( Collection allSchedulables, - Resource totalResources, ResourceType type, boolean isSteadyShare) { + Resource totalResources, String type, boolean isSteadyShare) { - Collection schedulables = new ArrayList(); + Collection schedulables = new ArrayList<>(); int takenResources = handleFixedFairShares( allSchedulables, schedulables, isSteadyShare, type); @@ -124,7 +123,7 @@ private static void computeSharesInternal( // have met all Schedulables' max shares. int totalMaxShare = 0; for (Schedulable sched : schedulables) { - long maxShare = getResourceValue(sched.getMaxShare(), type); + long maxShare = sched.getMaxShare().getResourceValue(type); totalMaxShare = (int) Math.min(maxShare + (long)totalMaxShare, Integer.MAX_VALUE); if (totalMaxShare == Integer.MAX_VALUE) { @@ -132,7 +131,7 @@ private static void computeSharesInternal( } } - long totalResource = Math.max((getResourceValue(totalResources, type) - + long totalResource = Math.max((totalResources.getResourceValue(type) - takenResources), 0); totalResource = Math.min(totalMaxShare, totalResource); @@ -159,13 +158,15 @@ private static void computeSharesInternal( } // Set the fair shares based on the value of R we've converged to for (Schedulable sched : schedulables) { + Resource target; + if (isSteadyShare) { - setResourceValue(computeShare(sched, right, type), - ((FSQueue) sched).getSteadyFairShare(), type); + target = ((FSQueue) sched).getSteadyFairShare(); } else { - setResourceValue( - computeShare(sched, right, type), sched.getFairShare(), type); + target = sched.getFairShare(); } + + target.setResourceValue(type, (long)computeShare(sched, right, type)); } } @@ -174,7 +175,7 @@ private static void computeSharesInternal( * w2rRatio, for use in the computeFairShares algorithm as described in # */ private static int resourceUsedWithWeightToResourceRatio(double w2rRatio, - Collection schedulables, ResourceType type) { + Collection schedulables, String type) { int resourcesTaken = 0; for (Schedulable sched : schedulables) { int share = computeShare(sched, w2rRatio, type); @@ -188,10 +189,10 @@ private static int resourceUsedWithWeightToResourceRatio(double w2rRatio, * weight-to-resource ratio w2rRatio. */ private static int computeShare(Schedulable sched, double w2rRatio, - ResourceType type) { - double share = sched.getWeights().getWeight(type) * w2rRatio; - share = Math.max(share, getResourceValue(sched.getMinShare(), type)); - share = Math.min(share, getResourceValue(sched.getMaxShare(), type)); + String type) { + double share = sched.getWeight() * w2rRatio; + share = Math.max(share, sched.getMinShare().getResourceValue(type)); + share = Math.min(share, sched.getMaxShare().getResourceValue(type)); return (int) share; } @@ -203,7 +204,7 @@ private static int computeShare(Schedulable sched, double w2rRatio, private static int handleFixedFairShares( Collection schedulables, Collection nonFixedSchedulables, - boolean isSteadyShare, ResourceType type) { + boolean isSteadyShare, String type) { int totalResource = 0; for (Schedulable sched : schedulables) { @@ -211,11 +212,15 @@ private static int handleFixedFairShares( if (fixedShare < 0) { nonFixedSchedulables.add(sched); } else { - setResourceValue(fixedShare, - isSteadyShare - ? ((FSQueue)sched).getSteadyFairShare() - : sched.getFairShare(), - type); + Resource target; + + if (isSteadyShare) { + target = ((FSQueue)sched).getSteadyFairShare(); + } else { + target = sched.getFairShare(); + } + + target.setResourceValue(type, fixedShare); totalResource = (int) Math.min((long)totalResource + (long)fixedShare, Integer.MAX_VALUE); } @@ -230,10 +235,10 @@ private static int handleFixedFairShares( * or the Schedulable is not active for instantaneous fairshare. */ private static long getFairShareIfFixed(Schedulable sched, - boolean isSteadyShare, ResourceType type) { + boolean isSteadyShare, String type) { // Check if maxShare is 0 - if (getResourceValue(sched.getMaxShare(), type) <= 0) { + if (sched.getMaxShare().getResourceValue(type) <= 0) { return 0; } @@ -244,35 +249,11 @@ private static long getFairShareIfFixed(Schedulable sched, } // Check if weight is 0 - if (sched.getWeights().getWeight(type) <= 0) { - long minShare = getResourceValue(sched.getMinShare(), type); + if (sched.getWeight() <= 0) { + long minShare = sched.getMinShare().getResourceValue(type); return (minShare <= 0) ? 0 : minShare; } return -1; } - - private static long getResourceValue(Resource resource, ResourceType type) { - switch (type) { - case MEMORY: - return resource.getMemorySize(); - case CPU: - return resource.getVirtualCores(); - default: - throw new IllegalArgumentException("Invalid resource"); - } - } - - private static void setResourceValue(long val, Resource resource, ResourceType type) { - switch (type) { - case MEMORY: - resource.setMemorySize(val); - break; - case CPU: - resource.setVirtualCores((int)val); - break; - default: - throw new IllegalArgumentException("Invalid resource"); - } - } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java index 72377b0c096..e58b3572968 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies; +import java.util.Arrays; import java.util.Collection; import java.util.Comparator; @@ -25,18 +26,15 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceType; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSContext; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Schedulable; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.SchedulingPolicy; - import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; - -import static org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceType.*; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; /** * Makes scheduling decisions by trying to equalize dominant resource usage. @@ -72,16 +70,18 @@ public ResourceCalculator getResourceCalculator() { @Override public void computeShares(Collection schedulables, Resource totalResources) { - for (ResourceType type : ResourceType.values()) { - ComputeFairShares.computeShares(schedulables, totalResources, type); + for (ResourceInformation info: ResourceUtils.getResourceTypesArray()) { + ComputeFairShares.computeShares(schedulables, totalResources, + info.getName()); } } @Override public void computeSteadyShares(Collection queues, Resource totalResources) { - for (ResourceType type : ResourceType.values()) { - ComputeFairShares.computeSteadyShares(queues, totalResources, type); + for (ResourceInformation info: ResourceUtils.getResourceTypesArray()) { + ComputeFairShares.computeSteadyShares(queues, totalResources, + info.getName()); } } @@ -110,9 +110,13 @@ public void initialize(FSContext fsContext) { COMPARATOR.setFSContext(fsContext); } - public static class DominantResourceFairnessComparator implements Comparator { - private static final int NUM_RESOURCES = ResourceType.values().length; - + /** + * This class compares two {@link Schedulable} instances according to the + * DRF policy. If neither instance is below min share, approximate fair share + * ratios are compared. + */ + public static class DominantResourceFairnessComparator + implements Comparator { private FSContext fsContext; public void setFSContext(FSContext fsContext) { @@ -121,89 +125,199 @@ public void setFSContext(FSContext fsContext) { @Override public int compare(Schedulable s1, Schedulable s2) { - ResourceWeights sharesOfCluster1 = new ResourceWeights(); - ResourceWeights sharesOfCluster2 = new ResourceWeights(); - ResourceWeights sharesOfMinShare1 = new ResourceWeights(); - ResourceWeights sharesOfMinShare2 = new ResourceWeights(); - ResourceType[] resourceOrder1 = new ResourceType[NUM_RESOURCES]; - ResourceType[] resourceOrder2 = new ResourceType[NUM_RESOURCES]; + ResourceInformation[] info = ResourceUtils.getResourceTypesArray(); + Resource usage1 = s1.getResourceUsage(); + Resource usage2 = s2.getResourceUsage(); + Resource minShare1 = s1.getMinShare(); + Resource minShare2 = s2.getMinShare(); Resource clusterCapacity = fsContext.getClusterResource(); - // Calculate shares of the cluster for each resource both schedulables. - calculateShares(s1.getResourceUsage(), - clusterCapacity, sharesOfCluster1, resourceOrder1, s1.getWeights()); - calculateShares(s1.getResourceUsage(), - s1.getMinShare(), sharesOfMinShare1, null, ResourceWeights.NEUTRAL); - calculateShares(s2.getResourceUsage(), - clusterCapacity, sharesOfCluster2, resourceOrder2, s2.getWeights()); - calculateShares(s2.getResourceUsage(), - s2.getMinShare(), sharesOfMinShare2, null, ResourceWeights.NEUTRAL); - + // These arrays hold the usage, fair, and min share ratios for each + // resource type. ratios[0][x] are the usage ratios, ratios[1][x] are + // the fair share ratios, and ratios[2][x] are the min share ratios. + float[][] ratios1 = new float[info.length][3]; + float[][] ratios2 = new float[info.length][3]; + + // Calculate cluster shares and approximate fair shares for each + // resource type of both schedulables. + int dominant1 = calculateClusterAndFairRatios(usage1, clusterCapacity, + ratios1, s1.getWeight()); + int dominant2 = calculateClusterAndFairRatios(usage2, clusterCapacity, + ratios2, s2.getWeight()); + // A queue is needy for its min share if its dominant resource - // (with respect to the cluster capacity) is below its configured min share - // for that resource - boolean s1Needy = sharesOfMinShare1.getWeight(resourceOrder1[0]) < 1.0f; - boolean s2Needy = sharesOfMinShare2.getWeight(resourceOrder2[0]) < 1.0f; + // (with respect to the cluster capacity) is below its configured min + // share for that resource + boolean s1Needy = + usage1.getResources()[dominant1].getValue() < + minShare1.getResources()[dominant1].getValue(); + boolean s2Needy = + usage2.getResources()[dominant2].getValue() < + minShare2.getResources()[dominant2].getValue(); int res = 0; + if (!s2Needy && !s1Needy) { - res = compareShares(sharesOfCluster1, sharesOfCluster2, - resourceOrder1, resourceOrder2); + // Sort shares by usage ratio and compare them by approximate fair share + // ratio + sortRatios(ratios1, ratios2); + res = compareRatios(ratios1, ratios2, 1); } else if (s1Needy && !s2Needy) { res = -1; } else if (s2Needy && !s1Needy) { res = 1; } else { // both are needy below min share - res = compareShares(sharesOfMinShare1, sharesOfMinShare2, - resourceOrder1, resourceOrder2); + // Calculate the min share ratios, then sort by usage ratio, and compare + // by min share ratio + calculateMinShareRatios(usage1, minShare1, ratios1); + calculateMinShareRatios(usage2, minShare2, ratios2); + sortRatios(ratios1, ratios2); + res = compareRatios(ratios1, ratios2, 2); } + if (res == 0) { // Apps are tied in fairness ratio. Break the tie by submit time and job // name to get a deterministic ordering, which is useful for unit tests. res = (int) Math.signum(s1.getStartTime() - s2.getStartTime()); + if (res == 0) { res = s1.getName().compareTo(s2.getName()); } } + return res; } + + /** + * Sort both ratios arrays according to the usage ratios (the + * first index of the inner arrays, e.g. {@code ratios1[x][0]}). + * + * @param ratios1 the first ratios array + * @param ratios2 the second ratios array + */ + @VisibleForTesting + void sortRatios(float[][] ratios1, float[][]ratios2) { + // sort order descending by resource share + Arrays.sort(ratios1, (float[] o1, float[] o2) -> + (int) Math.signum(o2[0] - o1[0])); + Arrays.sort(ratios2, (float[] o1, float[] o2) -> + (int) Math.signum(o2[0] - o1[0])); + } + + /** + * Calculate a resource's usage ratio and approximate fair share ratio. + * The {@code shares} array will be populated with both the usage ratio + * and the approximate fair share ratio for each resource type. The usage + * ratio is calculated as {@code resource} divided by {@code cluster}. + * The approximate fair share ratio is calculated as the usage ratio + * divided by {@code weight}. If the cluster's resources are 100MB and + * 10 vcores, and the usage ({@code resource}) is 10 MB and 5 CPU, the + * usage ratios will be 0.1 and 0.5. If the weights are 2, the fair + * share ratios will be 0.05 and 0.25. + * + * The approximate fair share ratio is the usage divided by the + * approximate fair share, i.e. the cluster resources times the weight. + * The approximate fair share is an acceptable proxy for the fair share + * because when comparing resources, the resource with the higher weight + * will be assigned by the scheduler a proportionally higher fair share. + * + * The {@code shares} array must be at least n x 2, where n + * is the number of resource types. Only the first and second indices of + * the inner arrays in the {@code shares} array will be used, e.g. + * {@code shares[x][0]} and {@code shares[x][1]}. + * + * The return value will be the index of the dominant resource type in the + * {@code shares} array. The dominant resource is the resource type for + * which {@code resource} has the largest usage ratio. + * + * @param resource the resource for which to calculate ratios + * @param cluster the total cluster resources + * @param ratios the shares array to populate + * @param weight the resource weight + * @return the index of the resource type with the largest cluster share + */ + @VisibleForTesting + int calculateClusterAndFairRatios(Resource resource, Resource cluster, + float[][] ratios, float weight) { + ResourceInformation[] resourceInfo = resource.getResources(); + ResourceInformation[] clusterInfo = cluster.getResources(); + int max = 0; + + for (int i = 0; i < clusterInfo.length; i++) { + // First calculate the cluster share + ratios[i][0] = + resourceInfo[i].getValue() / (float) clusterInfo[i].getValue(); + + // Use the cluster share to find the dominant resource + if (ratios[i][0] > ratios[max][0]) { + max = i; + } + + // Now divide by the weight to get the approximate fair share. + // It's OK if the weight is zero, because the floating point division + // will yield Infinity, i.e. this Schedulable will lose out to any + // other Schedulable with non-zero weight. + ratios[i][1] = ratios[i][0] / weight; + } + + return max; + } /** - * Calculates and orders a resource's share of a pool in terms of two vectors. - * The shares vector contains, for each resource, the fraction of the pool that - * it takes up. The resourceOrder vector contains an ordering of resources - * by largest share. So if resource=<10 MB, 5 CPU>, and pool=<100 MB, 10 CPU>, - * shares will be [.1, .5] and resourceOrder will be [CPU, MEMORY]. + * Calculate a resource's min share ratios. The {@code ratios} array will be + * populated with the {@code resource} divided by {@code minShare} for each + * resource type. If the min shares are 5 MB and 10 vcores, and the usage + * ({@code resource}) is 10 MB and 5 CPU, the ratios will be 2 and 0.5. + * + * The {@code ratios} array must be n x 3, where n is the + * number of resource types. Only the third index of the inner arrays in + * the {@code ratios} array will be used, e.g. {@code ratios[x][2]}. + * + * @param resource the resource for which to calculate min shares + * @param minShare the min share + * @param ratios the shares array to populate */ @VisibleForTesting - void calculateShares(Resource resource, Resource pool, - ResourceWeights shares, ResourceType[] resourceOrder, ResourceWeights weights) { - shares.setWeight(MEMORY, (float)resource.getMemorySize() / - (pool.getMemorySize() * weights.getWeight(MEMORY))); - shares.setWeight(CPU, (float)resource.getVirtualCores() / - (pool.getVirtualCores() * weights.getWeight(CPU))); - // sort order vector by resource share - if (resourceOrder != null) { - if (shares.getWeight(MEMORY) > shares.getWeight(CPU)) { - resourceOrder[0] = MEMORY; - resourceOrder[1] = CPU; - } else { - resourceOrder[0] = CPU; - resourceOrder[1] = MEMORY; - } + void calculateMinShareRatios(Resource resource, Resource minShare, + float[][] ratios) { + ResourceInformation[] resourceInfo = resource.getResources(); + ResourceInformation[] minShareInfo = minShare.getResources(); + + for (int i = 0; i < minShareInfo.length; i++) { + ratios[i][2] = + resourceInfo[i].getValue() / (float) minShareInfo[i].getValue(); } } - - private int compareShares(ResourceWeights shares1, ResourceWeights shares2, - ResourceType[] resourceOrder1, ResourceType[] resourceOrder2) { - for (int i = 0; i < resourceOrder1.length; i++) { - int ret = (int)Math.signum(shares1.getWeight(resourceOrder1[i]) - - shares2.getWeight(resourceOrder2[i])); + + /** + * Compare the two ratios arrays and return -1, 0, or 1 if the first array + * is less than, equal to, or greater than the second array, respectively. + * The {@code index} parameter determines which index of the inner arrays + * will be used for the comparisons. 0 is for usage ratios, 1 is for + * fair share ratios, and 2 is for the min share ratios. The ratios arrays + * are assumed to be sorted in descending order by usage ratio. + * + * @param ratios1 the first shares array + * @param ratios2 the second shares array + * @param index the outer index of the ratios arrays to compare. 0 is for + * usage ratio, 1 is for approximate fair share ratios, and 1 is for min + * share ratios + * @return -1, 0, or 1 if the first array is less than, equal to, or + * greater than the second array, respectively + */ + @VisibleForTesting + int compareRatios(float[][] ratios1, float[][] ratios2, int index) { + int ret = 0; + + for (int i = 0; i < ratios1.length; i++) { + ret = (int) Math.signum(ratios1[i][index] - ratios2[i][index]); + if (ret != 0) { - return ret; + break; } } - return 0; + + return ret; } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java index 0ef90a1d72f..8179aa75033 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java @@ -26,7 +26,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceType; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Schedulable; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.SchedulingPolicy; @@ -42,9 +42,10 @@ @Private @Unstable public class FairSharePolicy extends SchedulingPolicy { - private static final Log LOG = LogFactory.getLog(FairSharePolicy.class); @VisibleForTesting public static final String NAME = "fair"; + private static final Log LOG = LogFactory.getLog(FairSharePolicy.class); + private static final String MEMORY = ResourceInformation.MEMORY_MB.getName(); private static final DefaultResourceCalculator RESOURCE_CALCULATOR = new DefaultResourceCalculator(); private static final FairShareComparator COMPARATOR = @@ -164,10 +165,11 @@ private int compareMinShareUsage(Schedulable s1, Schedulable s2, */ private int compareFairShareUsage(Schedulable s1, Schedulable s2, Resource resourceUsage1, Resource resourceUsage2) { - double weight1 = s1.getWeights().getWeight(ResourceType.MEMORY); - double weight2 = s2.getWeights().getWeight(ResourceType.MEMORY); + double weight1 = s1.getWeight(); + double weight2 = s2.getWeight(); double useToWeightRatio1; double useToWeightRatio2; + if (weight1 > 0.0 && weight2 > 0.0) { useToWeightRatio1 = resourceUsage1.getMemorySize() / weight1; useToWeightRatio2 = resourceUsage2.getMemorySize() / weight2; @@ -213,14 +215,13 @@ public Resource getHeadroom(Resource queueFairShare, @Override public void computeShares(Collection schedulables, Resource totalResources) { - ComputeFairShares.computeShares(schedulables, totalResources, ResourceType.MEMORY); + ComputeFairShares.computeShares(schedulables, totalResources, MEMORY); } @Override public void computeSteadyShares(Collection queues, Resource totalResources) { - ComputeFairShares.computeSteadyShares(queues, totalResources, - ResourceType.MEMORY); + ComputeFairShares.computeSteadyShares(queues, totalResources, MEMORY); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index 94c7e166ff5..185d426d717 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -241,17 +241,8 @@ private synchronized void initScheduler(Configuration conf) { //Use ConcurrentSkipListMap because applications need to be ordered this.applications = new ConcurrentSkipListMap<>(); - this.minimumAllocation = - Resources.createResource(conf.getInt( - YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB)); - initMaximumResourceCapability( - Resources.createResource(conf.getInt( - YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB), - conf.getInt( - YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES))); + this.minimumAllocation = super.getMinimumAllocation(); + initMaximumResourceCapability(super.getMaximumAllocation()); this.usePortForNodeName = conf.getBoolean( YarnConfiguration.RM_SCHEDULER_INCLUDE_PORT_IN_NODE_NAME, YarnConfiguration.DEFAULT_RM_SCHEDULER_USE_PORT_FOR_NODE_NAME); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/RMDelegationTokenSecretManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/RMDelegationTokenSecretManager.java index 631ca9d2e5a..53cc4716ca6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/RMDelegationTokenSecretManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/RMDelegationTokenSecretManager.java @@ -34,6 +34,7 @@ import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable; @@ -52,7 +53,7 @@ public class RMDelegationTokenSecretManager extends private static final Log LOG = LogFactory .getLog(RMDelegationTokenSecretManager.class); - protected final RMContext rmContext; + private final ResourceManager rm; /** * Create a secret manager @@ -73,7 +74,7 @@ public RMDelegationTokenSecretManager(long delegationKeyUpdateInterval, RMContext rmContext) { super(delegationKeyUpdateInterval, delegationTokenMaxLifetime, delegationTokenRenewInterval, delegationTokenRemoverScanInterval); - this.rmContext = rmContext; + this.rm = rmContext.getResourceManager(); } @Override @@ -85,7 +86,7 @@ public RMDelegationTokenIdentifier createIdentifier() { protected void storeNewMasterKey(DelegationKey newKey) { try { LOG.info("storing master key with keyID " + newKey.getKeyId()); - rmContext.getStateStore().storeRMDTMasterKey(newKey); + rm.getRMContext().getStateStore().storeRMDTMasterKey(newKey); } catch (Exception e) { LOG.error("Error in storing master key with KeyID: " + newKey.getKeyId()); ExitUtil.terminate(1, e); @@ -96,7 +97,7 @@ protected void storeNewMasterKey(DelegationKey newKey) { protected void removeStoredMasterKey(DelegationKey key) { try { LOG.info("removing master key with keyID " + key.getKeyId()); - rmContext.getStateStore().removeRMDTMasterKey(key); + rm.getRMContext().getStateStore().removeRMDTMasterKey(key); } catch (Exception e) { LOG.error("Error in removing master key with KeyID: " + key.getKeyId()); ExitUtil.terminate(1, e); @@ -109,7 +110,8 @@ protected void storeNewToken(RMDelegationTokenIdentifier identifier, try { LOG.info("storing RMDelegation token with sequence number: " + identifier.getSequenceNumber()); - rmContext.getStateStore().storeRMDelegationToken(identifier, renewDate); + rm.getRMContext().getStateStore().storeRMDelegationToken(identifier, + renewDate); } catch (Exception e) { LOG.error("Error in storing RMDelegationToken with sequence number: " + identifier.getSequenceNumber()); @@ -123,7 +125,7 @@ protected void updateStoredToken(RMDelegationTokenIdentifier id, try { LOG.info("updating RMDelegation token with sequence number: " + id.getSequenceNumber()); - rmContext.getStateStore().updateRMDelegationToken(id, renewDate); + rm.getRMContext().getStateStore().updateRMDelegationToken(id, renewDate); } catch (Exception e) { LOG.error("Error in updating persisted RMDelegationToken" + " with sequence number: " + id.getSequenceNumber()); @@ -137,7 +139,7 @@ protected void removeStoredToken(RMDelegationTokenIdentifier ident) try { LOG.info("removing RMDelegation token with sequence number: " + ident.getSequenceNumber()); - rmContext.getStateStore().removeRMDelegationToken(ident); + rm.getRMContext().getStateStore().removeRMDelegationToken(ident); } catch (Exception e) { LOG.error("Error in removing RMDelegationToken with sequence number: " + ident.getSequenceNumber()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/ContainerPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/ContainerPage.java index 2cd209b6eae..0204989fed1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/ContainerPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/ContainerPage.java @@ -20,7 +20,6 @@ import static org.apache.hadoop.yarn.util.StringHelper.join; -import org.apache.hadoop.yarn.server.webapp.ContainerBlock; import org.apache.hadoop.yarn.webapp.SubView; import org.apache.hadoop.yarn.webapp.YarnWebParams; @@ -38,7 +37,7 @@ protected void preHead(Page.HTML<__> html) { @Override protected Class content() { - return ContainerBlock.class; + return RMContainerBlock.class; } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java index f6b1a943a60..806b6364099 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java @@ -19,17 +19,21 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.UserMetricsInfo; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet.DIV; import org.apache.hadoop.yarn.webapp.view.HtmlBlock; import com.google.inject.Inject; +import java.util.Arrays; + /** * Provides an table with an overview of many cluster wide metrics and if * per user metrics are enabled it will show an overview of what the @@ -168,8 +172,8 @@ protected void render(Block html) { } } - - SchedulerInfo schedulerInfo=new SchedulerInfo(this.rm); + + SchedulerInfo schedulerInfo = new SchedulerInfo(this.rm); div.h3("Scheduler Metrics"). table("#schedulermetricsoverview"). @@ -186,7 +190,8 @@ protected void render(Block html) { tbody().$class("ui-widget-content"). tr(). td(String.valueOf(schedulerInfo.getSchedulerType())). - td(String.valueOf(schedulerInfo.getSchedulerResourceTypes())). + td(String.valueOf(Arrays.toString(ResourceUtils.getResourcesTypeInfo() + .toArray(new ResourceTypeInfo[0])))). td(schedulerInfo.getMinAllocation().toString()). td(schedulerInfo.getMaxAllocation().toString()). td(String.valueOf(schedulerInfo.getMaxClusterLevelAppPriority())). diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java index 82ddb54b70d..b26eb0951ed 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java @@ -23,18 +23,22 @@ import static org.apache.hadoop.yarn.webapp.view.JQueryUI._ODD; import static org.apache.hadoop.yarn.webapp.view.JQueryUI._TH; +import java.io.IOException; import java.util.Collection; import java.util.List; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainersRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerReport; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; @@ -61,7 +65,7 @@ public class RMAppAttemptBlock extends AppAttemptBlock{ @Inject RMAppAttemptBlock(ViewContext ctx, ResourceManager rm, Configuration conf) { - super(rm.getClientRMService(), ctx); + super(null, ctx); this.rm = rm; this.conf = conf; } @@ -275,4 +279,18 @@ protected void createTablesForAttemptMetrics(Block html) { createContainerLocalityTable(html); createResourceRequestsTable(html); } + + @Override + protected List getContainers( + final GetContainersRequest request) throws YarnException, IOException { + return rm.getClientRMService().getContainers(request).getContainerList(); + } + + @Override + protected ApplicationAttemptReport getApplicationAttemptReport( + final GetApplicationAttemptReportRequest request) + throws YarnException, IOException { + return rm.getClientRMService().getApplicationAttemptReport(request) + .getApplicationAttemptReport(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java index cd04264d07c..8553d8cde28 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java @@ -20,15 +20,23 @@ import static org.apache.hadoop.yarn.webapp.view.JQueryUI._INFO_WRAP; +import java.io.IOException; import java.util.Collection; +import java.util.List; import java.util.Set; import org.apache.commons.lang.StringEscapeUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainerReportRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport; +import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.ContainerReport; import org.apache.hadoop.yarn.api.records.LogAggregationStatus; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics; @@ -36,6 +44,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppAttemptInfo; import org.apache.hadoop.yarn.server.webapp.AppBlock; +import org.apache.hadoop.yarn.util.StringHelper; import org.apache.hadoop.yarn.util.resource.Resources; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet.DIV; @@ -52,7 +61,7 @@ public class RMAppBlock extends AppBlock{ @Inject RMAppBlock(ViewContext ctx, Configuration conf, ResourceManager rm) { - super(rm.getClientRMService(), ctx, conf); + super(null, ctx, conf); this.conf = conf; this.rm = rm; } @@ -98,15 +107,12 @@ protected void createApplicationMetricsTable(Block html){ attemptResourcePreempted) .__("Number of Non-AM Containers Preempted from Current Attempt:", attemptNumNonAMContainerPreempted) - .__("Aggregate Resource Allocation:", - String.format("%d MB-seconds, %d vcore-seconds", - appMetrics == null ? "N/A" : appMetrics.getMemorySeconds(), - appMetrics == null ? "N/A" : appMetrics.getVcoreSeconds())) + .__("Aggregate Resource Allocation:", appMetrics == null ? "N/A" : + StringHelper + .getResourceSecondsString(appMetrics.getResourceSecondsMap())) .__("Aggregate Preempted Resource Allocation:", - String.format("%d MB-seconds, %d vcore-seconds", - appMetrics == null ? "N/A" : appMetrics.getPreemptedMemorySeconds(), - appMetrics == null ? "N/A" : - appMetrics.getPreemptedVcoreSeconds())); + appMetrics == null ? "N/A" : StringHelper.getResourceSecondsString( + appMetrics.getPreemptedResourceSecondsMap())); pdiv.__(); } @@ -187,4 +193,29 @@ protected LogAggregationStatus getLogAggregationStatus() { } return rmApp.getLogAggregationStatusForAppReport(); } + + @Override + protected ContainerReport getContainerReport( + final GetContainerReportRequest request) + throws YarnException, IOException { + return rm.getClientRMService().getContainerReport(request) + .getContainerReport(); + } + + @Override + protected List getApplicationAttemptsReport( + final GetApplicationAttemptsRequest request) + throws YarnException, IOException { + return rm.getClientRMService().getApplicationAttempts(request) + .getApplicationAttemptList(); + } + + @Override + protected ApplicationReport getApplicationReport( + final GetApplicationReportRequest request) + throws YarnException, IOException { + return rm.getClientRMService().getApplicationReport(request) + .getApplicationReport(); + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java index ede71e34378..d0dccab6e5c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java @@ -22,14 +22,17 @@ import static org.apache.hadoop.yarn.webapp.view.JQueryUI.C_PROGRESSBAR; import static org.apache.hadoop.yarn.webapp.view.JQueryUI.C_PROGRESSBAR_VALUE; +import java.io.IOException; +import java.util.List; import java.util.Set; import org.apache.commons.lang.StringEscapeUtils; import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.yarn.api.ApplicationBaseProtocol; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; @@ -47,9 +50,8 @@ public class RMAppsBlock extends AppsBlock { private ResourceManager rm; @Inject - RMAppsBlock(ResourceManager rm, ApplicationBaseProtocol appBaseProt, - View.ViewContext ctx) { - super(appBaseProt, ctx); + RMAppsBlock(ResourceManager rm, View.ViewContext ctx) { + super(null, ctx); this.rm = rm; } @@ -193,4 +195,11 @@ protected void renderData(Block html) { tbody.__().__(); } + + @Override + protected List getApplicationReport( + final GetApplicationsRequest request) throws YarnException, IOException { + return rm.getClientRMService().getApplications(request) + .getApplicationList(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMContainerBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMContainerBlock.java new file mode 100644 index 00000000000..f589a5dafba --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMContainerBlock.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.webapp; + +import java.io.IOException; + +import org.apache.hadoop.yarn.api.protocolrecords.GetContainerReportRequest; +import org.apache.hadoop.yarn.api.records.ContainerReport; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; +import org.apache.hadoop.yarn.server.webapp.ContainerBlock; + +import com.google.inject.Inject; + +public class RMContainerBlock extends ContainerBlock { + + private final ResourceManager rm; + + @Inject + public RMContainerBlock(ResourceManager resourceManager, ViewContext ctx) { + super(null, ctx); + this.rm = resourceManager; + } + + @Override + protected ContainerReport getContainerReport( + final GetContainerReportRequest request) + throws YarnException, IOException { + return rm.getClientRMService().getContainerReport(request) + .getContainerReport(); + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebApp.java index 3367cf47334..4e36665267a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebApp.java @@ -23,7 +23,6 @@ import java.net.InetSocketAddress; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; -import org.apache.hadoop.yarn.api.ApplicationBaseProtocol; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.util.RMHAUtils; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; @@ -55,7 +54,6 @@ public void setup() { if (rm != null) { bind(ResourceManager.class).toInstance(rm); - bind(ApplicationBaseProtocol.class).toInstance(rm.getClientRMService()); } route("/", RmController.class); route(pajoin("/nodes", NODE_STATE), RmController.class, "nodes"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java index 7ffe106f876..6dc3d9ab094 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java @@ -72,7 +72,12 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenRequest; import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainerReportRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainersRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest; @@ -95,10 +100,12 @@ import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationPriorityRequest; import org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsRequest; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ApplicationTimeoutType; +import org.apache.hadoop.yarn.api.records.ContainerReport; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeLabel; @@ -223,7 +230,8 @@ public class RMWebServices extends WebServices implements RMWebServiceProtocol { @Inject public RMWebServices(final ResourceManager rm, Configuration conf) { - super(rm.getClientRMService()); + // don't inject, always take appBaseRoot from RM. + super(null); this.rm = rm; this.conf = conf; isCentralizedNodeLabelConfiguration = @@ -2404,4 +2412,46 @@ public Void run() throws IOException, YarnException { app.getApplicationTimeouts().get(appTimeout.getTimeoutType())); return Response.status(Status.OK).entity(timeout).build(); } + + @Override + protected ApplicationReport getApplicationReport( + GetApplicationReportRequest request) throws YarnException, IOException { + return rm.getClientRMService().getApplicationReport(request) + .getApplicationReport(); + } + + @Override + protected List getApplicationsReport( + final GetApplicationsRequest request) throws YarnException, IOException { + return rm.getClientRMService().getApplications(request) + .getApplicationList(); + } + + @Override + protected ApplicationAttemptReport getApplicationAttemptReport( + GetApplicationAttemptReportRequest request) + throws YarnException, IOException { + return rm.getClientRMService().getApplicationAttemptReport(request) + .getApplicationAttemptReport(); + } + + @Override + protected List getApplicationAttemptsReport( + GetApplicationAttemptsRequest request) throws YarnException, IOException { + return rm.getClientRMService().getApplicationAttempts(request) + .getApplicationAttemptList(); + } + + @Override + protected ContainerReport getContainerReport( + GetContainerReportRequest request) throws YarnException, IOException { + return rm.getClientRMService().getContainerReport(request) + .getContainerReport(); + } + + @Override + protected List getContainersReport( + GetContainersRequest request) throws YarnException, IOException { + return rm.getClientRMService().getContainers(request).getContainerList(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java index 9fb8fb5858d..236c4677653 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java @@ -101,6 +101,7 @@ public class AppInfo { private long vcoreSeconds; protected float queueUsagePercentage; protected float clusterUsagePercentage; + protected Map resourceSecondsMap; // preemption info fields private long preemptedResourceMB; @@ -109,6 +110,7 @@ public class AppInfo { private int numAMContainerPreempted; private long preemptedMemorySeconds; private long preemptedVcoreSeconds; + protected Map preemptedResourceSecondsMap; // list of resource requests @XmlElement(name = "resourceRequests") @@ -236,8 +238,10 @@ public AppInfo(ResourceManager rm, RMApp app, Boolean hasAccess, appMetrics.getResourcePreempted().getVirtualCores(); memorySeconds = appMetrics.getMemorySeconds(); vcoreSeconds = appMetrics.getVcoreSeconds(); + resourceSecondsMap = appMetrics.getResourceSecondsMap(); preemptedMemorySeconds = appMetrics.getPreemptedMemorySeconds(); preemptedVcoreSeconds = appMetrics.getPreemptedVcoreSeconds(); + preemptedResourceSecondsMap = appMetrics.getPreemptedResourceSecondsMap(); ApplicationSubmissionContext appSubmissionContext = app.getApplicationSubmissionContext(); unmanagedApplication = appSubmissionContext.getUnmanagedAM(); @@ -415,6 +419,22 @@ public long getReservedVCores() { return this.reservedVCores; } + public long getPreemptedMB() { + return preemptedResourceMB; + } + + public long getPreemptedVCores() { + return preemptedResourceVCores; + } + + public int getNumNonAMContainersPreempted() { + return numNonAMContainerPreempted; + } + + public int getNumAMContainersPreempted() { + return numAMContainerPreempted; + } + public long getMemorySeconds() { return memorySeconds; } @@ -423,6 +443,10 @@ public long getVcoreSeconds() { return vcoreSeconds; } + public Map getResourceSecondsMap() { + return resourceSecondsMap; + } + public long getPreemptedMemorySeconds() { return preemptedMemorySeconds; } @@ -431,6 +455,10 @@ public long getPreemptedVcoreSeconds() { return preemptedVcoreSeconds; } + public Map getPreemptedResourceSecondsMap() { + return preemptedResourceSecondsMap; + } + public List getResourceRequests() { return this.resourceRequests; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ResourceInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ResourceInfo.java index 5083943b65a..e13980afc39 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ResourceInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ResourceInfo.java @@ -20,46 +20,68 @@ import javax.xml.bind.annotation.XmlAccessType; import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlRootElement; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.util.resource.Resources; @XmlRootElement -@XmlAccessorType(XmlAccessType.FIELD) +@XmlAccessorType(XmlAccessType.NONE) public class ResourceInfo { + + @XmlElement long memory; + @XmlElement int vCores; - + + private Resource resources; + public ResourceInfo() { } public ResourceInfo(Resource res) { memory = res.getMemorySize(); vCores = res.getVirtualCores(); + resources = Resources.clone(res); } public long getMemorySize() { - return memory; + if (resources == null) { + resources = Resource.newInstance(memory, vCores); + } + return resources.getMemorySize(); } public int getvCores() { - return vCores; + if (resources == null) { + resources = Resource.newInstance(memory, vCores); + } + return resources.getVirtualCores(); } - + @Override public String toString() { - return ""; + return resources.toString(); } public void setMemory(int memory) { + if (resources == null) { + resources = Resource.newInstance(memory, vCores); + } this.memory = memory; + resources.setMemorySize(memory); } public void setvCores(int vCores) { + if (resources == null) { + resources = Resource.newInstance(memory, vCores); + } this.vCores = vCores; + resources.setVirtualCores(vCores); } public Resource getResource() { - return Resource.newInstance(memory, vCores); + return Resource.newInstance(resources); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/SchedulerInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/SchedulerInfo.java index cf93edd2c1e..81491b14ce1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/SchedulerInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/SchedulerInfo.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao; +import java.util.Arrays; import java.util.EnumSet; import javax.xml.bind.annotation.XmlRootElement; @@ -73,7 +74,7 @@ public ResourceInfo getMaxAllocation() { } public String getSchedulerResourceTypes() { - return this.schedulingResourceTypes.toString(); + return Arrays.toString(minAllocResource.getResource().getResources()); } public int getMaxClusterLevelAppPriority() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto index 247cd2195d9..39a56a811a3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto @@ -87,6 +87,8 @@ message ApplicationAttemptStateDataProto { optional int64 finish_time = 12; optional int64 preempted_memory_seconds = 13; optional int64 preempted_vcore_seconds = 14; + repeated StringLongMapProto application_resource_usage_map = 15; + repeated StringLongMapProto preempted_resource_usage_map = 16; } message EpochProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 1235774d934..b772e80fcf1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -80,6 +80,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.recovery.NullRMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; +import org.apache.hadoop.yarn.server.resourcemanager.resource.TestResourceProfiles; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; @@ -104,6 +105,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; import org.apache.hadoop.yarn.util.Records; import org.apache.hadoop.yarn.util.YarnVersionInfo; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.log4j.Level; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; @@ -150,6 +152,10 @@ public MockRM(Configuration conf, RMStateStore store, public MockRM(Configuration conf, RMStateStore store, boolean useNullRMNodeLabelsManager, boolean useRealElector) { super(); + if (conf.getBoolean(TestResourceProfiles.TEST_CONF_RESET_RESOURCE_TYPES, + true)) { + ResourceUtils.resetResourceTypes(conf); + } this.useNullRMNodeLabelsManager = useNullRMNodeLabelsManager; this.useRealElector = useRealElector; init(conf instanceof YarnConfiguration ? conf : new YarnConfiguration(conf)); @@ -811,6 +817,8 @@ PrivilegedExceptionAction setClientReq( RMAppAttemptState.SCHEDULED); } + ((AbstractYarnScheduler)getResourceScheduler()).update(); + return rmApp; } @@ -934,6 +942,7 @@ public FailApplicationAttemptResponse failApplicationAttempt( public MockAM sendAMLaunched(ApplicationAttemptId appAttemptId) throws Exception { MockAM am = new MockAM(getRMContext(), masterService, appAttemptId); + ((AbstractYarnScheduler)scheduler).update(); waitForState(appAttemptId, RMAppAttemptState.ALLOCATED); //create and set AMRMToken Token amrmToken = @@ -1158,6 +1167,7 @@ public static MockAM launchAM(RMApp app, MockRM rm, MockNM nm) RMAppAttempt attempt = waitForAttemptScheduled(app, rm); LOG.info("Launch AM " + attempt.getAppAttemptId()); nm.nodeHeartbeat(true); + ((AbstractYarnScheduler)rm.getResourceScheduler()).update(); rm.drainEventsImplicitly(); MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId()); rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.LAUNCHED); @@ -1173,6 +1183,7 @@ public static MockAM launchUAM(RMApp app, MockRM rm, MockNM nm) waitForSchedulerAppAttemptAdded(attempt.getAppAttemptId(), rm); LOG.info("Launch AM " + attempt.getAppAttemptId()); nm.nodeHeartbeat(true); + ((AbstractYarnScheduler)rm.getResourceScheduler()).update(); rm.drainEventsImplicitly(); MockAM am = new MockAM(rm.getRMContext(), rm.masterService, attempt.getAppAttemptId()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ParameterizedSchedulerTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ParameterizedSchedulerTestBase.java index 00809f04f81..289ff1cebe2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ParameterizedSchedulerTestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ParameterizedSchedulerTestBase.java @@ -18,53 +18,74 @@ package org.apache.hadoop.yarn.server.resourcemanager; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairSchedulerConfiguration; -import org.junit.Before; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; +import java.util.Arrays; +import java.util.Collection; +import java.util.stream.Collectors; +@RunWith(Parameterized.class) public abstract class ParameterizedSchedulerTestBase { protected final static String TEST_DIR = new File(System.getProperty("test.build.data", "/tmp")).getAbsolutePath(); private final static String FS_ALLOC_FILE = new File(TEST_DIR, "test-fs-queues.xml").getAbsolutePath(); - private SchedulerType schedulerType; - private YarnConfiguration conf = null; - private AbstractYarnScheduler scheduler = null; - public enum SchedulerType { CAPACITY, FAIR } + @Parameterized.Parameters(name = "{0}") + public static Collection getParameters() { + return Arrays.stream(SchedulerType.values()).map( + type -> new Object[]{type}).collect(Collectors.toList()); + } + + private SchedulerType schedulerType; + private YarnConfiguration conf = null; + private AbstractYarnScheduler scheduler = null; + public YarnConfiguration getConf() { return conf; } - @Before - public void configureScheduler() throws IOException, ClassNotFoundException { + // Due to parameterization, this gets called before each test method + public ParameterizedSchedulerTestBase(SchedulerType type) + throws IOException { conf = new YarnConfiguration(); - Class schedulerClass = - conf.getClass(YarnConfiguration.RM_SCHEDULER, - Class.forName(YarnConfiguration.DEFAULT_RM_SCHEDULER)); + QueueMetrics.clearQueueMetrics(); + DefaultMetricsSystem.setMiniClusterMode(true); - if (schedulerClass == FairScheduler.class) { - schedulerType = SchedulerType.FAIR; - configureFairScheduler(conf); - scheduler = new FairScheduler(); - } else if (schedulerClass == CapacityScheduler.class) { - schedulerType = SchedulerType.CAPACITY; - scheduler = new CapacityScheduler(); - ((CapacityScheduler)scheduler).setConf(conf); + schedulerType = type; + switch (schedulerType) { + case FAIR: + configureFairScheduler(conf); + scheduler = new FairScheduler(); + conf.set(YarnConfiguration.RM_SCHEDULER, + FairScheduler.class.getName()); + break; + case CAPACITY: + scheduler = new CapacityScheduler(); + ((CapacityScheduler)scheduler).setConf(conf); + conf.set(YarnConfiguration.RM_SCHEDULER, + CapacityScheduler.class.getName()); + break; + default: + throw new IllegalArgumentException("Invalid type: " + type); } } @@ -85,7 +106,6 @@ private void configureFairScheduler(YarnConfiguration conf) throws IOException { out.println(""); out.close(); - conf.set(YarnConfiguration.RM_SCHEDULER, FairScheduler.class.getName()); conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, FS_ALLOC_FILE); conf.setLong(FairSchedulerConfiguration.UPDATE_INTERVAL_MS, 10); } @@ -97,7 +117,8 @@ public SchedulerType getSchedulerType() { /** * Return a scheduler configured by {@code YarnConfiguration.RM_SCHEDULER} * - *

    The scheduler is configured by {@link #configureScheduler()}. + *

    The scheduler is configured by + * {@link #ParameterizedSchedulerTestBase(SchedulerType)}. * Client test code can obtain the scheduler with this getter method. * Schedulers supported by this class are {@link FairScheduler} or * {@link CapacityScheduler}.

    diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/RMHATestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/RMHATestBase.java index 4d8b20d69fd..4ac4fc306b5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/RMHATestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/RMHATestBase.java @@ -36,6 +36,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.junit.After; @@ -95,6 +96,7 @@ protected MockAM launchAM(RMApp app, MockRM rm, MockNM nm) throws Exception { RMAppAttempt attempt = app.getCurrentAppAttempt(); nm.nodeHeartbeat(true); + ((AbstractYarnScheduler)rm.getResourceScheduler()).update(); MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId()); am.registerAppAttempt(); rm.waitForState(app.getApplicationId(), RMAppState.RUNNING); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java index db26a875990..b24a309fc10 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java @@ -35,6 +35,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.concurrent.ConcurrentMap; import org.apache.commons.logging.Log; @@ -56,6 +57,7 @@ import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; @@ -247,6 +249,8 @@ public void setUp() { ResourceScheduler scheduler = mockResourceScheduler(); ((RMContextImpl)rmContext).setScheduler(scheduler); Configuration conf = new Configuration(); + conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true); + ((RMContextImpl) rmContext).setYarnConfiguration(conf); ApplicationMasterService masterService = new ApplicationMasterService(rmContext, scheduler); appMonitor = new TestRMAppManager(rmContext, @@ -827,9 +831,12 @@ public void testEscapeApplicationSummary() { when(app.getState()).thenReturn(RMAppState.RUNNING); when(app.getApplicationType()).thenReturn("MAPREDUCE"); when(app.getSubmitTime()).thenReturn(1000L); + Map resourceSecondsMap = new HashMap<>(); + resourceSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), 16384L); + resourceSecondsMap.put(ResourceInformation.VCORES.getName(), 64L); RMAppMetrics metrics = new RMAppMetrics(Resource.newInstance(1234, 56), - 10, 1, 16384, 64, 0, 0); + 10, 1, resourceSecondsMap, new HashMap<>()); when(app.getRMAppMetrics()).thenReturn(metrics); RMAppManager.ApplicationSummary.SummaryBuilder summary = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java index e684f3c1ad3..7a2e500e661 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java @@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.api.records.ContainerUpdateType; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.UpdateContainerRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -102,9 +103,11 @@ public void init(ApplicationMasterServiceContext amsContext, } @Override - public void registerApplicationMaster(ApplicationAttemptId - applicationAttemptId, RegisterApplicationMasterRequest request, - RegisterApplicationMasterResponse response) throws IOException { + public void registerApplicationMaster( + ApplicationAttemptId applicationAttemptId, + RegisterApplicationMasterRequest request, + RegisterApplicationMasterResponse response) + throws IOException, YarnException { nextProcessor.registerApplicationMaster( applicationAttemptId, request, response); } @@ -144,7 +147,8 @@ public void init(ApplicationMasterServiceContext amsContext, public void registerApplicationMaster( ApplicationAttemptId applicationAttemptId, RegisterApplicationMasterRequest request, - RegisterApplicationMasterResponse response) throws IOException { + RegisterApplicationMasterResponse response) + throws IOException, YarnException { beforeRegCount.incrementAndGet(); nextProcessor.registerApplicationMaster(applicationAttemptId, request, response); @@ -672,4 +676,38 @@ private void sentRMContainerLaunched(MockRM rm, ContainerId containerId) { Assert.fail("Cannot find RMContainer"); } } + + @Test(timeout = 3000000) + public void testResourceProfiles() throws Exception { + + MockRM rm = new MockRM(conf); + rm.start(); + MockNM nm1 = rm.registerNode("127.0.0.1:1234", 6 * GB); + RMApp app1 = rm.submitApp(2048); + nm1.nodeHeartbeat(true); + RMAppAttempt attempt1 = app1.getCurrentAppAttempt(); + MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId()); + RegisterApplicationMasterResponse resp = am1.registerAppAttempt(); + Assert.assertEquals(0, resp.getResourceProfiles().size()); + rm.stop(); + conf.setBoolean(YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED, true); + conf.set(YarnConfiguration.RM_RESOURCE_PROFILES_SOURCE_FILE, + "profiles/sample-profiles-1.json"); + rm = new MockRM(conf); + rm.start(); + nm1 = rm.registerNode("127.0.0.1:1234", 6 * GB); + app1 = rm.submitApp(2048); + nm1.nodeHeartbeat(true); + attempt1 = app1.getCurrentAppAttempt(); + am1 = rm.sendAMLaunched(attempt1.getAppAttemptId()); + resp = am1.registerAppAttempt(); + Assert.assertEquals(3, resp.getResourceProfiles().size()); + Assert.assertEquals(Resource.newInstance(1024, 1), + resp.getResourceProfiles().get("minimum")); + Assert.assertEquals(Resource.newInstance(2048, 2), + resp.getResourceProfiles().get("default")); + Assert.assertEquals(Resource.newInstance(4096, 4), + resp.getResourceProfiles().get("maximum")); + rm.stop(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java index ea733a4ce4a..35b3f86d015 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java @@ -56,6 +56,8 @@ import org.apache.hadoop.yarn.MockApps; import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.api.protocolrecords.ApplicationsRequestScope; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -116,6 +118,7 @@ import org.apache.hadoop.yarn.api.records.ReservationRequest; import org.apache.hadoop.yarn.api.records.ReservationRequests; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -1118,6 +1121,12 @@ public void handle(Event event) {} assertEquals("Incorrect number of applications for user", 3, rmService.getApplications(request).getApplicationList().size()); + rmService.setDisplayPerUserApps(true); + userSet.clear(); + assertEquals("Incorrect number of applications for user", 6, + rmService.getApplications(request).getApplicationList().size()); + rmService.setDisplayPerUserApps(false); + // Check tags request = GetApplicationsRequest.newInstance( ApplicationsRequestScope.ALL, null, null, null, null, null, null, @@ -2061,4 +2070,115 @@ protected ClientRMService createClientRMService() { rpc.stopProxy(client, conf); new File(excludeFile).delete(); } + + @Test + public void testGetResourceTypesInfoWhenResourceProfileDisabled() + throws Exception { + YarnConfiguration conf = new YarnConfiguration(); + MockRM rm = new MockRM(conf) { + protected ClientRMService createClientRMService() { + return new ClientRMService(this.rmContext, scheduler, + this.rmAppManager, this.applicationACLsManager, this.queueACLsManager, + this.getRMContext().getRMDelegationTokenSecretManager()); + } + }; + rm.start(); + + YarnRPC rpc = YarnRPC.create(conf); + InetSocketAddress rmAddress = rm.getClientRMService().getBindAddress(); + LOG.info("Connecting to ResourceManager at " + rmAddress); + ApplicationClientProtocol client = + (ApplicationClientProtocol) rpc + .getProxy(ApplicationClientProtocol.class, rmAddress, conf); + + // Make call + GetAllResourceTypeInfoRequest request = + GetAllResourceTypeInfoRequest.newInstance(); + GetAllResourceTypeInfoResponse response = client.getResourceTypeInfo(request); + + Assert.assertEquals(2, response.getResourceTypeInfo().size()); + + // Check memory + Assert.assertEquals(ResourceInformation.MEMORY_MB.getName(), + response.getResourceTypeInfo().get(0).getName()); + Assert.assertEquals(ResourceInformation.MEMORY_MB.getUnits(), + response.getResourceTypeInfo().get(0).getDefaultUnit()); + + // Check vcores + Assert.assertEquals(ResourceInformation.VCORES.getName(), + response.getResourceTypeInfo().get(1).getName()); + Assert.assertEquals(ResourceInformation.VCORES.getUnits(), + response.getResourceTypeInfo().get(1).getDefaultUnit()); + + rm.stop(); + rpc.stopProxy(client, conf); + } + + @Test + public void testGetApplicationsWithPerUserApps() + throws IOException, YarnException { + /* + * Submit 3 applications alternately in two queues + */ + // Basic setup + YarnScheduler yarnScheduler = mockYarnScheduler(); + RMContext rmContext = mock(RMContext.class); + mockRMContext(yarnScheduler, rmContext); + RMStateStore stateStore = mock(RMStateStore.class); + when(rmContext.getStateStore()).thenReturn(stateStore); + doReturn(mock(RMTimelineCollectorManager.class)).when(rmContext) + .getRMTimelineCollectorManager(); + + RMAppManager appManager = new RMAppManager(rmContext, yarnScheduler, null, + mock(ApplicationACLsManager.class), new Configuration()); + when(rmContext.getDispatcher().getEventHandler()) + .thenReturn(new EventHandler() { + public void handle(Event event) { + } + }); + + // Simulate Queue ACL manager which returns false always + QueueACLsManager queueAclsManager = mock(QueueACLsManager.class); + when(queueAclsManager.checkAccess(any(UserGroupInformation.class), + any(QueueACL.class), any(RMApp.class), any(String.class), + anyListOf(String.class))).thenReturn(false); + + // Simulate app ACL manager which returns false always + ApplicationACLsManager appAclsManager = mock(ApplicationACLsManager.class); + when(appAclsManager.checkAccess(eq(UserGroupInformation.getCurrentUser()), + any(ApplicationAccessType.class), any(String.class), + any(ApplicationId.class))).thenReturn(false); + ClientRMService rmService = new ClientRMService(rmContext, yarnScheduler, + appManager, appAclsManager, queueAclsManager, null); + rmService.init(new Configuration()); + + // Initialize appnames and queues + String[] queues = {QUEUE_1, QUEUE_2}; + String[] appNames = {MockApps.newAppName(), MockApps.newAppName(), + MockApps.newAppName()}; + ApplicationId[] appIds = {getApplicationId(101), getApplicationId(102), + getApplicationId(103)}; + List tags = Arrays.asList("Tag1", "Tag2", "Tag3"); + + long[] submitTimeMillis = new long[3]; + // Submit applications + for (int i = 0; i < appIds.length; i++) { + ApplicationId appId = appIds[i]; + SubmitApplicationRequest submitRequest = mockSubmitAppRequest(appId, + appNames[i], queues[i % queues.length], + new HashSet(tags.subList(0, i + 1))); + rmService.submitApplication(submitRequest); + submitTimeMillis[i] = System.currentTimeMillis(); + } + + // Test different cases of ClientRMService#getApplications() + GetApplicationsRequest request = GetApplicationsRequest.newInstance(); + assertEquals("Incorrect total number of apps", 6, + rmService.getApplications(request).getApplicationList().size()); + + rmService.setDisplayPerUserApps(true); + assertEquals("Incorrect number of applications for user", 0, + rmService.getApplications(request).getApplicationList().size()); + rmService.setDisplayPerUserApps(false); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMTokens.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMTokens.java index 65145a4c93d..06c1c425dd8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMTokens.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMTokens.java @@ -533,8 +533,11 @@ private static ResourceScheduler createMockScheduler(Configuration conf) { private static RMDelegationTokenSecretManager createRMDelegationTokenSecretManager(long secretKeyInterval, long tokenMaxLifetime, long tokenRenewInterval) { + ResourceManager rm = mock(ResourceManager.class); RMContext rmContext = mock(RMContext.class); when(rmContext.getStateStore()).thenReturn(new NullRMStateStore()); + when(rm.getRMContext()).thenReturn(rmContext); + when(rmContext.getResourceManager()).thenReturn(rm); RMDelegationTokenSecretManager rmDtSecretManager = new RMDelegationTokenSecretManager(secretKeyInterval, tokenMaxLifetime, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java index ba9de6c8d36..9ed4978bdd6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java @@ -23,6 +23,7 @@ import java.util.Collection; import java.util.HashMap; import java.util.List; +import java.util.Map; import org.apache.commons.lang.time.DateUtils; import org.apache.hadoop.security.UserGroupInformation; @@ -32,6 +33,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore; @@ -422,6 +424,9 @@ private AggregateAppResourceUsage calculateContainerResourceMetrics( * usedMillis / DateUtils.MILLIS_PER_SECOND; long vcoreSeconds = resource.getVirtualCores() * usedMillis / DateUtils.MILLIS_PER_SECOND; - return new AggregateAppResourceUsage(memorySeconds, vcoreSeconds); + Map map = new HashMap<>(); + map.put(ResourceInformation.MEMORY_MB.getName(), memorySeconds); + map.put(ResourceInformation.VCORES.getName(), vcoreSeconds); + return new AggregateAppResourceUsage(map); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestNodeBlacklistingOnAMFailures.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestNodeBlacklistingOnAMFailures.java index 526621004cb..e7d666ad8ad 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestNodeBlacklistingOnAMFailures.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestNodeBlacklistingOnAMFailures.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; @@ -41,11 +42,14 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.resource.Resources; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; /** @@ -54,6 +58,12 @@ */ public class TestNodeBlacklistingOnAMFailures { + @Before + public void setup() { + QueueMetrics.clearQueueMetrics(); + DefaultMetricsSystem.setMiniClusterMode(true); + } + @Test(timeout = 100000) public void testNodeBlacklistingOnAMFailure() throws Exception { @@ -361,6 +371,7 @@ public void testNoBlacklistingForNonSystemErrors() throws Exception { // Now the AM container should be allocated RMAppAttempt attempt = MockRM.waitForAttemptScheduled(app, rm); node.nodeHeartbeat(true); + ((AbstractYarnScheduler)rm.getResourceScheduler()).update(); rm.drainEvents(); MockRM.waitForState(attempt, RMAppAttemptState.ALLOCATED, 20000); rm.sendAMLaunched(attempt.getAppAttemptId()); @@ -388,6 +399,7 @@ public void testNoBlacklistingForNonSystemErrors() throws Exception { .println("New AppAttempt launched " + attempt.getAppAttemptId()); node.nodeHeartbeat(true); + ((AbstractYarnScheduler)rm.getResourceScheduler()).update(); rm.drainEvents(); MockRM.waitForState(attempt, RMAppAttemptState.ALLOCATED, 20000); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java index 39313d06bd4..f912f68e910 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java @@ -21,11 +21,13 @@ import com.google.common.base.Supplier; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.yarn.event.DrainDispatcher; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairSchedulerConfiguration; import org.junit.Before; import static org.mockito.Matchers.argThat; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.spy; +import java.io.IOException; import java.util.ArrayList; import java.util.EnumSet; import java.util.HashMap; @@ -89,6 +91,10 @@ public class TestRM extends ParameterizedSchedulerTestBase { private YarnConfiguration conf; + public TestRM(SchedulerType type) throws IOException { + super(type); + } + @Before public void setup() { conf = getConf(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java index 512c14a84df..588f16deefc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java @@ -41,6 +41,7 @@ import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.GroupMappingServiceProvider; import org.apache.hadoop.security.Groups; @@ -74,6 +75,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.resource.DynamicResourceConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; import static org.apache.hadoop.yarn.conf.YarnConfiguration.RM_PROXY_USER_PREFIX; @@ -109,6 +111,9 @@ public class TestRMAdminService { @Before public void setup() throws IOException { + QueueMetrics.clearQueueMetrics(); + DefaultMetricsSystem.setMiniClusterMode(true); + configuration = new YarnConfiguration(); configuration.set(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class.getCanonicalName()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index 5cbcdbcb80d..0346f4fbaab 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -77,6 +77,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; +import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; @@ -146,6 +147,10 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase { private static InetSocketAddress rmAddr; private List rms = new ArrayList(); + public TestRMRestart(SchedulerType type) throws IOException { + super(type); + } + @Before public void setup() throws IOException { conf = getConf(); @@ -383,6 +388,7 @@ public void testRMRestart() throws Exception { // assert app1 attempt is saved attempt1 = loadedApp1.getCurrentAppAttempt(); attemptId1 = attempt1.getAppAttemptId(); + ((AbstractYarnScheduler)rm2.getResourceScheduler()).update(); rm2.waitForState(attemptId1, RMAppAttemptState.ALLOCATED); appState = rmAppState.get(loadedApp1.getApplicationId()); attemptState = appState.getAttempt(attemptId1); @@ -2097,7 +2103,8 @@ public static NMContainerStatus createNMContainerStatus( NMContainerStatus containerReport = NMContainerStatus.newInstance(containerId, 0, containerState, Resource.newInstance(1024, 1), "recover container", 0, - Priority.newInstance(0), 0, nodeLabelExpression); + Priority.newInstance(0), 0, nodeLabelExpression, + ExecutionType.GUARANTEED); return containerReport; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index 5ed327868c6..41078d092cf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -29,8 +29,11 @@ import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -44,15 +47,19 @@ import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.event.DrainDispatcher; import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.EventDispatcher; import org.apache.hadoop.yarn.event.EventHandler; @@ -74,10 +81,14 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.server.timelineservice.collector.PerNodeTimelineCollectorsAuxService; import org.apache.hadoop.yarn.server.timelineservice.storage.FileSystemTimelineWriterImpl; @@ -2026,6 +2037,103 @@ public void tearDown() { } } + @SuppressWarnings("unchecked") + @Test + public void testHandleOpportunisticContainerStatus() throws Exception{ + final DrainDispatcher dispatcher = new DrainDispatcher(); + YarnConfiguration conf = new YarnConfiguration(); + conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true); + conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, + true); + rm = new MockRM(conf){ + @Override + protected Dispatcher createDispatcher() { + return dispatcher; + } + }; + + rm.start(); + RMApp app = rm.submitApp(1024, true); + ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt() + .getAppAttemptId(); + + ResourceTrackerService resourceTrackerService = + rm.getResourceTrackerService(); + SchedulerApplicationAttempt applicationAttempt = null; + while (applicationAttempt == null) { + applicationAttempt = + ((AbstractYarnScheduler)rm.getRMContext().getScheduler()) + .getApplicationAttempt(appAttemptId); + Thread.sleep(100); + } + + Resource currentConsumption = applicationAttempt.getCurrentConsumption(); + Assert.assertEquals(Resource.newInstance(0, 0), currentConsumption); + Resource allocResources = + applicationAttempt.getQueue().getMetrics().getAllocatedResources(); + Assert.assertEquals(Resource.newInstance(0, 0), allocResources); + + RegisterNodeManagerRequest req = Records.newRecord( + RegisterNodeManagerRequest.class); + NodeId nodeId = NodeId.newInstance("host2", 1234); + Resource capability = BuilderUtils.newResource(1024, 1); + req.setResource(capability); + req.setNodeId(nodeId); + req.setHttpPort(1234); + req.setNMVersion(YarnVersionInfo.getVersion()); + ContainerId c1 = ContainerId.newContainerId(appAttemptId, 1); + ContainerId c2 = ContainerId.newContainerId(appAttemptId, 2); + ContainerId c3 = ContainerId.newContainerId(appAttemptId, 3); + NMContainerStatus queuedOpp = + NMContainerStatus.newInstance(c1, 1, ContainerState.SCHEDULED, + Resource.newInstance(1024, 1), "Dummy Queued OC", + ContainerExitStatus.INVALID, Priority.newInstance(5), 1234, "", + ExecutionType.OPPORTUNISTIC); + NMContainerStatus runningOpp = + NMContainerStatus.newInstance(c2, 1, ContainerState.RUNNING, + Resource.newInstance(2048, 1), "Dummy Running OC", + ContainerExitStatus.INVALID, Priority.newInstance(6), 1234, "", + ExecutionType.OPPORTUNISTIC); + NMContainerStatus runningGuar = + NMContainerStatus.newInstance(c3, 1, ContainerState.RUNNING, + Resource.newInstance(2048, 1), "Dummy Running GC", + ContainerExitStatus.INVALID, Priority.newInstance(6), 1234, "", + ExecutionType.GUARANTEED); + req.setContainerStatuses(Arrays.asList(queuedOpp, runningOpp, runningGuar)); + // trying to register a invalid node. + RegisterNodeManagerResponse response = + resourceTrackerService.registerNodeManager(req); + dispatcher.await(); + Thread.sleep(2000); + dispatcher.await(); + Assert.assertEquals(NodeAction.NORMAL, response.getNodeAction()); + + Collection liveContainers = applicationAttempt + .getLiveContainers(); + Assert.assertEquals(3, liveContainers.size()); + Iterator iter = liveContainers.iterator(); + while (iter.hasNext()) { + RMContainer rc = iter.next(); + Assert.assertEquals( + rc.getContainerId().equals(c3) ? + ExecutionType.GUARANTEED : ExecutionType.OPPORTUNISTIC, + rc.getExecutionType()); + } + + // Should only include GUARANTEED resources + currentConsumption = applicationAttempt.getCurrentConsumption(); + Assert.assertEquals(Resource.newInstance(2048, 1), currentConsumption); + allocResources = + applicationAttempt.getQueue().getMetrics().getAllocatedResources(); + Assert.assertEquals(Resource.newInstance(2048, 1), allocResources); + + SchedulerNode schedulerNode = + rm.getRMContext().getScheduler().getSchedulerNode(nodeId); + Assert.assertNotNull(schedulerNode); + Resource nodeResources = schedulerNode.getAllocatedResource(); + Assert.assertEquals(Resource.newInstance(2048, 1), nodeResources); + } + @Test(timeout = 60000) public void testNodeHeartBeatResponseForUnknownContainerCleanUp() throws Exception { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestTokenClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestTokenClientRMService.java index 78271c65482..3e1ede9cd46 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestTokenClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestTokenClientRMService.java @@ -71,8 +71,11 @@ public class TestTokenClientRMService { @BeforeClass public static void setupSecretManager() throws IOException { + ResourceManager rm = mock(ResourceManager.class); RMContext rmContext = mock(RMContext.class); when(rmContext.getStateStore()).thenReturn(new NullRMStateStore()); + when(rm.getRMContext()).thenReturn(rmContext); + when(rmContext.getResourceManager()).thenReturn(rm); dtsm = new RMDelegationTokenSecretManager(60000, 60000, 60000, 60000, rmContext); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java index 2c37f44e416..a13cae719d6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java @@ -107,6 +107,10 @@ public class TestWorkPreservingRMRestart extends ParameterizedSchedulerTestBase MockRM rm1 = null; MockRM rm2 = null; + public TestWorkPreservingRMRestart(SchedulerType type) throws IOException { + super(type); + } + @Before public void setup() throws UnknownHostException { Logger rootLogger = LogManager.getRootLogger(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java index f826631a21d..399df02465e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.applicationsmanager; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -189,7 +190,8 @@ public Set getRanNodes() { @Override public RMAppMetrics getRMAppMetrics() { - return new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, 0, 0, 0, 0); + return new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, new HashMap<>(), + new HashMap<>()); } @Override @@ -337,8 +339,9 @@ public Set getApplicationTags() { public ApplicationReport createAndGetApplicationReport( String clientUserName, boolean allowAccess) { ApplicationResourceUsageReport usageReport = - ApplicationResourceUsageReport.newInstance(0, 0, null, null, null, - 0, 0, 0, 0, 0, 0); + ApplicationResourceUsageReport + .newInstance(0, 0, null, null, null, new HashMap<>(), 0, 0, + new HashMap<>()); ApplicationReport report = ApplicationReport.newInstance( getApplicationId(), appAttemptId, getUser(), getQueue(), getName(), null, 0, null, null, getDiagnostics().toString(), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java index 7005bca6585..2287617b215 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Collections; import java.util.EnumSet; +import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; @@ -40,6 +41,7 @@ import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState; import org.apache.hadoop.yarn.api.records.YarnApplicationState; @@ -506,9 +508,16 @@ private static RMApp createRMApp(ApplicationId appId) { when(app.getCurrentAppAttempt()).thenReturn(appAttempt); when(app.getFinalApplicationStatus()).thenReturn( FinalApplicationStatus.UNDEFINED); - when(app.getRMAppMetrics()).thenReturn( - new RMAppMetrics(null, 0, 0, Integer.MAX_VALUE, Long.MAX_VALUE, - Integer.MAX_VALUE, Long.MAX_VALUE)); + Map resourceMap = new HashMap<>(); + resourceMap + .put(ResourceInformation.MEMORY_MB.getName(), (long) Integer.MAX_VALUE); + resourceMap.put(ResourceInformation.VCORES.getName(), Long.MAX_VALUE); + Map preemptedMap = new HashMap<>(); + preemptedMap + .put(ResourceInformation.MEMORY_MB.getName(), (long) Integer.MAX_VALUE); + preemptedMap.put(ResourceInformation.VCORES.getName(), Long.MAX_VALUE); + when(app.getRMAppMetrics()) + .thenReturn(new RMAppMetrics(null, 0, 0, resourceMap, preemptedMap)); Set appTags = new HashSet(); appTags.add("test"); appTags.add("tags"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisherForV2.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisherForV2.java index c6bfcc71b23..68bb325a620 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisherForV2.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisherForV2.java @@ -29,6 +29,8 @@ import java.io.FileReader; import java.io.IOException; import java.util.Collections; +import java.util.HashMap; +import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; @@ -46,6 +48,7 @@ import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity; import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType; import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEvent; @@ -357,15 +360,20 @@ private static RMApp createRMApp(ApplicationId appId) { when(app.getDiagnostics()).thenReturn( new StringBuilder("test diagnostics info")); RMAppAttempt appAttempt = mock(RMAppAttempt.class); - when(appAttempt.getAppAttemptId()).thenReturn( - ApplicationAttemptId.newInstance(appId, 1)); + when(appAttempt.getAppAttemptId()) + .thenReturn(ApplicationAttemptId.newInstance(appId, 1)); when(app.getCurrentAppAttempt()).thenReturn(appAttempt); - when(app.getFinalApplicationStatus()).thenReturn( - FinalApplicationStatus.UNDEFINED); + when(app.getFinalApplicationStatus()) + .thenReturn(FinalApplicationStatus.UNDEFINED); + Map resourceSecondsMap = new HashMap<>(); + resourceSecondsMap + .put(ResourceInformation.MEMORY_MB.getName(), (long) Integer.MAX_VALUE); + resourceSecondsMap + .put(ResourceInformation.VCORES.getName(), Long.MAX_VALUE); when(app.getRMAppMetrics()).thenReturn( - new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, Integer.MAX_VALUE, - Long.MAX_VALUE, 0, 0)); - when(app.getApplicationTags()).thenReturn(Collections. emptySet()); + new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, resourceSecondsMap, + new HashMap<>())); + when(app.getApplicationTags()).thenReturn(Collections.emptySet()); ApplicationSubmissionContext appSubmissionContext = mock(ApplicationSubmissionContext.class); when(appSubmissionContext.getPriority()) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java index 06a16ffeffe..453d805a843 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java @@ -194,7 +194,7 @@ protected RMAppAttempt storeAttempt(RMStateStore store, when(mockAttempt.getRMAppAttemptMetrics()) .thenReturn(mockRmAppAttemptMetrics); when(mockRmAppAttemptMetrics.getAggregateAppResourceUsage()) - .thenReturn(new AggregateAppResourceUsage(0, 0)); + .thenReturn(new AggregateAppResourceUsage(new HashMap<>())); dispatcher.attemptId = attemptId; store.storeNewApplicationAttempt(mockAttempt); waitNotify(dispatcher); @@ -292,7 +292,7 @@ void testRMAppStateStore(RMStateStoreHelper stateStoreHelper, when(mockRemovedAttempt.getRMAppAttemptMetrics()) .thenReturn(mockRmAppAttemptMetrics); when(mockRmAppAttemptMetrics.getAggregateAppResourceUsage()) - .thenReturn(new AggregateAppResourceUsage(0,0)); + .thenReturn(new AggregateAppResourceUsage(new HashMap<>())); attempts.put(attemptIdRemoved, mockRemovedAttempt); store.removeApplication(mockRemovedApp); @@ -369,7 +369,7 @@ void testRMAppStateStore(RMStateStoreHelper stateStoreHelper, oldAttemptState.getStartTime(), RMAppAttemptState.FINISHED, "myTrackingUrl", "attemptDiagnostics", FinalApplicationStatus.SUCCEEDED, 100, - oldAttemptState.getFinishTime(), 0, 0, 0, 0); + oldAttemptState.getFinishTime(), new HashMap<>(), new HashMap<>()); store.updateApplicationAttemptState(newAttemptState); // test updating the state of an app/attempt whose initial state was not @@ -393,7 +393,7 @@ void testRMAppStateStore(RMStateStoreHelper stateStoreHelper, oldAttemptState.getStartTime(), RMAppAttemptState.FINISHED, "myTrackingUrl", "attemptDiagnostics", FinalApplicationStatus.SUCCEEDED, 111, - oldAttemptState.getFinishTime(), 0, 0, 0, 0); + oldAttemptState.getFinishTime(), new HashMap<>(), new HashMap<>()); store.updateApplicationAttemptState(dummyAttempt); // let things settle down diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java index 5ae82391bdf..e5cf95d07b1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java @@ -33,12 +33,7 @@ import org.apache.hadoop.security.token.delegation.DelegationKey; import org.apache.hadoop.service.Service; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; -import org.apache.hadoop.yarn.api.records.Container; -import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.*; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl; import org.apache.hadoop.yarn.conf.HAUtil; @@ -511,7 +506,7 @@ public void testFencedState() throws Exception { when(mockAttempt.getRMAppAttemptMetrics()) .thenReturn(mockRmAppAttemptMetrics); when(mockRmAppAttemptMetrics.getAggregateAppResourceUsage()) - .thenReturn(new AggregateAppResourceUsage(0,0)); + .thenReturn(new AggregateAppResourceUsage(new HashMap<>())); store.storeNewApplicationAttempt(mockAttempt); assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState()); @@ -523,7 +518,7 @@ public void testFencedState() throws Exception { store.getCredentialsFromAppAttempt(mockAttempt), startTime, RMAppAttemptState.FINISHED, "testUrl", "test", FinalApplicationStatus.SUCCEEDED, 100, - finishTime, 0, 0, 0, 0); + finishTime, new HashMap<>(), new HashMap<>()); store.updateApplicationAttemptState(newAttemptState); assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState()); @@ -751,10 +746,20 @@ private static ApplicationStateData createAppState( private static ApplicationAttemptStateData createFinishedAttempt( ApplicationAttemptId attemptId, Container container, long startTime, int amExitStatus) { + Map resourceSecondsMap = new HashMap<>(); + Map preemptedResoureSecondsMap = new HashMap<>(); + resourceSecondsMap + .put(ResourceInformation.MEMORY_MB.getName(), 0L); + resourceSecondsMap + .put(ResourceInformation.VCORES.getName(), 0L); + preemptedResoureSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), + 0L); + preemptedResoureSecondsMap + .put(ResourceInformation.VCORES.getName(), 0L); return ApplicationAttemptStateData.newInstance(attemptId, container, null, startTime, RMAppAttemptState.FINISHED, "myTrackingUrl", "attemptDiagnostics", FinalApplicationStatus.SUCCEEDED, - amExitStatus, 0, 0, 0, 0, 0); + amExitStatus, 0, resourceSecondsMap, preemptedResoureSecondsMap); } private ApplicationAttemptId storeAttempt(RMStateStore store, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/BaseSharingPolicyTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/BaseSharingPolicyTest.java new file mode 100644 index 00000000000..294564a2d0c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/BaseSharingPolicyTest.java @@ -0,0 +1,189 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ +package org.apache.hadoop.yarn.server.resourcemanager.reservation; + +import static junit.framework.TestCase.fail; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; + +import java.io.IOException; +import java.util.Map; + +import org.apache.hadoop.yarn.api.records.ReservationDefinition; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException; +import org.apache.hadoop.yarn.server.resourcemanager.reservation.planning.ReservationAgent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; +import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; +import org.apache.hadoop.yarn.util.resource.ResourceCalculator; +import org.apache.hadoop.yarn.util.resource.Resources; +import org.junit.Before; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import net.jcip.annotations.NotThreadSafe; + +/** + * This class is a base test for {@code SharingPolicy} implementors. + */ +@RunWith(value = Parameterized.class) +@NotThreadSafe +@SuppressWarnings("VisibilityModifier") +public abstract class BaseSharingPolicyTest { + + @Parameterized.Parameter(value = 0) + public long duration; + + @Parameterized.Parameter(value = 1) + public double height; + + @Parameterized.Parameter(value = 2) + public int numSubmissions; + + @Parameterized.Parameter(value = 3) + public String recurrenceExpression; + + @Parameterized.Parameter(value = 4) + public Class expectedError; + + private long step; + private long initTime; + + private InMemoryPlan plan; + private ReservationAgent mAgent; + private Resource minAlloc; + private ResourceCalculator res; + private Resource maxAlloc; + + private int totCont = 1000; + + protected ReservationSchedulerConfiguration conf; + + @Before + public void setup() { + // 1 sec step + step = 1000L; + initTime = System.currentTimeMillis(); + + minAlloc = Resource.newInstance(1024, 1); + res = new DefaultResourceCalculator(); + maxAlloc = Resource.newInstance(1024 * 8, 8); + + mAgent = mock(ReservationAgent.class); + + QueueMetrics rootQueueMetrics = mock(QueueMetrics.class); + Resource clusterResource = + ReservationSystemTestUtil.calculateClusterResource(totCont); + + // invoke implementors initialization of policy + SharingPolicy policy = getInitializedPolicy(); + + RMContext context = ReservationSystemTestUtil.createMockRMContext(); + + plan = new InMemoryPlan(rootQueueMetrics, policy, mAgent, clusterResource, + step, res, minAlloc, maxAlloc, "dedicated", null, true, context); + } + + public void runTest() throws IOException, PlanningException { + + long period = 1; + if (recurrenceExpression != null) { + period = Long.parseLong(recurrenceExpression); + } + + try { + RLESparseResourceAllocation rle = generateRLEAlloc(period); + + // Generate the intervalMap (trimming out-of-period entries) + Map reservationIntervalResourceMap; + if (period > 1) { + rle = new PeriodicRLESparseResourceAllocation(rle, period); + reservationIntervalResourceMap = + ReservationSystemTestUtil.toAllocation(rle, 0, period); + } else { + reservationIntervalResourceMap = ReservationSystemTestUtil + .toAllocation(rle, Long.MIN_VALUE, Long.MAX_VALUE); + } + + ReservationDefinition rDef = + ReservationSystemTestUtil.createSimpleReservationDefinition( + initTime % period, initTime % period + duration + 1, duration, 1, + recurrenceExpression); + + // perform multiple submissions where required + for (int i = 0; i < numSubmissions; i++) { + + long rstart = rle.getEarliestStartTime(); + long rend = rle.getLatestNonNullTime(); + + InMemoryReservationAllocation resAlloc = + new InMemoryReservationAllocation( + ReservationSystemTestUtil.getNewReservationId(), rDef, "u1", + "dedicated", rstart, rend, reservationIntervalResourceMap, res, + minAlloc); + + assertTrue(plan.toString(), plan.addReservation(resAlloc, false)); + } + // fail if error was expected + if (expectedError != null) { + System.out.println(plan.toString()); + fail(); + } + } catch (Exception e) { + if (expectedError == null || !e.getClass().getCanonicalName() + .equals(expectedError.getCanonicalName())) { + // fail on unexpected errors + throw e; + } + } + } + + private RLESparseResourceAllocation generateRLEAlloc(long period) { + RLESparseResourceAllocation rle = + new RLESparseResourceAllocation(new DefaultResourceCalculator()); + + Resource alloc = Resources.multiply(minAlloc, height * totCont); + + // loop in case the periodicity of the reservation is smaller than LCM + long rStart = initTime % period; + long rEnd = initTime % period + duration; + + + // handle wrap-around + if (period > 1 && rEnd > period) { + long diff = rEnd - period; + rEnd = period; + + // handle multiple wrap-arounds (e.g., 5h duration on a 2h periodicity) + if(duration > period) { + rle.addInterval(new ReservationInterval(0, period), + Resources.multiply(alloc, duration / period - 1)); + rle.addInterval(new ReservationInterval(0, diff % period), alloc); + } else { + rle.addInterval(new ReservationInterval(0, diff), alloc); + } + } + + + rle.addInterval(new ReservationInterval(rStart, rEnd), alloc); + return rle; + } + + public abstract SharingPolicy getInitializedPolicy(); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationSystemTestUtil.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationSystemTestUtil.java index 5337e061be9..eef86a44990 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationSystemTestUtil.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/ReservationSystemTestUtil.java @@ -1,4 +1,4 @@ -/******************************************************************************* +/****************************************************************************** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -14,7 +14,7 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - *******************************************************************************/ + *****************************************************************************/ package org.apache.hadoop.yarn.server.resourcemanager.reservation; import static org.mockito.Matchers.any; @@ -466,4 +466,28 @@ public static RLESparseResourceAllocation generateRLESparseResourceAllocation( public static Resource calculateClusterResource(int numContainers) { return Resource.newInstance(numContainers * 1024, numContainers); } + + + public static Map toAllocation( + RLESparseResourceAllocation rle, long start, long end) { + Map resAlloc = new TreeMap<>(); + + for (Map.Entry e : rle.getCumulative().entrySet()) { + Long nextKey = rle.getCumulative().higherKey(e.getKey()); + if (nextKey == null) { + break; + } else { + if (e.getKey() >= start && e.getKey() <= end && nextKey >= start + && nextKey <= end) { + resAlloc.put(new ReservationInterval(e.getKey(), nextKey), + e.getValue()); + } + } + } + + return resAlloc; + } + + + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestCapacityOverTimePolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestCapacityOverTimePolicy.java index 2dee60c2f65..d054d3a7e41 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestCapacityOverTimePolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestCapacityOverTimePolicy.java @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,269 +17,118 @@ *******************************************************************************/ package org.apache.hadoop.yarn.server.resourcemanager.reservation; -import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.mock; - import java.io.IOException; -import java.util.Map; -import java.util.TreeMap; +import java.util.Arrays; +import java.util.Collection; -import org.apache.hadoop.yarn.api.records.ReservationDefinition; -import org.apache.hadoop.yarn.api.records.ReservationRequest; -import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import net.jcip.annotations.NotThreadSafe; import org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException; import org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningQuotaException; -import org.apache.hadoop.yarn.server.resourcemanager.reservation.planning.ReservationAgent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; -import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; -import org.apache.hadoop.yarn.util.resource.ResourceCalculator; -import org.junit.Assert; -import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; -public class TestCapacityOverTimePolicy { +/** + * This class tests the {@code CapacityOvertimePolicy} sharing policy. + */ +@RunWith(value = Parameterized.class) +@NotThreadSafe +@SuppressWarnings("VisibilityModifier") +public class TestCapacityOverTimePolicy extends BaseSharingPolicyTest { - long timeWindow; - long step; - float avgConstraint; - float instConstraint; - long initTime; + final static long ONEDAY = 86400 * 1000; + final static long ONEHOUR = 3600 * 1000; + final static long ONEMINUTE = 60 * 1000; + final static String TWODAYPERIOD = "7200000"; + final static String ONEDAYPERIOD = "86400000"; - InMemoryPlan plan; - ReservationAgent mAgent; - Resource minAlloc; - ResourceCalculator res; - Resource maxAlloc; + @Parameterized.Parameters(name = "Duration {0}, height {1}," + + " submission {2}, periodic {3})") + public static Collection data() { + return Arrays.asList(new Object[][] { - int totCont = 1000000; + // easy fit + {ONEHOUR, 0.25, 1, null, null }, + {ONEHOUR, 0.25, 1, TWODAYPERIOD, null }, + {ONEHOUR, 0.25, 1, ONEDAYPERIOD, null }, - @Before - public void setup() throws Exception { + // instantaneous high, but fit integral and inst limits + {ONEMINUTE, 0.74, 1, null, null }, + {ONEMINUTE, 0.74, 1, TWODAYPERIOD, null }, + {ONEMINUTE, 0.74, 1, ONEDAYPERIOD, null }, + + // barely fit + {ONEHOUR, 0.76, 1, null, PlanningQuotaException.class }, + {ONEHOUR, 0.76, 1, TWODAYPERIOD, PlanningQuotaException.class }, + {ONEHOUR, 0.76, 1, ONEDAYPERIOD, PlanningQuotaException.class }, + + // overcommit with single reservation + {ONEHOUR, 1.1, 1, null, PlanningQuotaException.class }, + {ONEHOUR, 1.1, 1, TWODAYPERIOD, PlanningQuotaException.class }, + {ONEHOUR, 1.1, 1, ONEDAYPERIOD, PlanningQuotaException.class }, + + // barely fit with multiple reservations (instantaneously, lowering to + // 1min to fit integral) + {ONEMINUTE, 0.25, 3, null, null }, + {ONEMINUTE, 0.25, 3, TWODAYPERIOD, null }, + {ONEMINUTE, 0.25, 3, ONEDAYPERIOD, null }, + + // overcommit with multiple reservations (instantaneously) + {ONEMINUTE, 0.25, 4, null, PlanningQuotaException.class }, + {ONEMINUTE, 0.25, 4, TWODAYPERIOD, PlanningQuotaException.class }, + {ONEMINUTE, 0.25, 4, ONEDAYPERIOD, PlanningQuotaException.class }, + + // (non-periodic) reservation longer than window + {25 * ONEHOUR, 0.25, 1, null, PlanningQuotaException.class }, + {25 * ONEHOUR, 0.25, 1, TWODAYPERIOD, PlanningQuotaException.class }, + {25 * ONEHOUR, 0.25, 1, ONEDAYPERIOD, PlanningQuotaException.class }, + + // (non-periodic) reservation longer than window + {25 * ONEHOUR, 0.05, 5, null, PlanningQuotaException.class }, + {25 * ONEHOUR, 0.05, 5, TWODAYPERIOD, PlanningQuotaException.class }, + {25 * ONEHOUR, 0.05, 5, ONEDAYPERIOD, PlanningQuotaException.class }, + + // overcommit integral + {ONEDAY, 0.26, 1, null, PlanningQuotaException.class }, + {2 * ONEHOUR, 0.26, 1, TWODAYPERIOD, PlanningQuotaException.class }, + {2 * ONEDAY, 0.26, 1, ONEDAYPERIOD, PlanningQuotaException.class }, + + // overcommit integral + {ONEDAY / 2, 0.51, 1, null, PlanningQuotaException.class }, + {2 * ONEHOUR / 2, 0.51, 1, TWODAYPERIOD, + PlanningQuotaException.class }, + {2 * ONEDAY / 2, 0.51, 1, ONEDAYPERIOD, PlanningQuotaException.class } + + }); + } + + @Override + public SharingPolicy getInitializedPolicy() { // 24h window - timeWindow = 86400000L; + long timeWindow = 86400000L; + // 1 sec step - step = 1000L; + long step = 1000L; // 25% avg cap on capacity - avgConstraint = 25; + float avgConstraint = 25; // 70% instantaneous cap on capacity - instConstraint = 70; + float instConstraint = 75; - initTime = System.currentTimeMillis(); - minAlloc = Resource.newInstance(1024, 1); - res = new DefaultResourceCalculator(); - maxAlloc = Resource.newInstance(1024 * 8, 8); - - mAgent = mock(ReservationAgent.class); - QueueMetrics rootQueueMetrics = mock(QueueMetrics.class); String reservationQ = ReservationSystemTestUtil.getFullReservationQueueName(); - Resource clusterResource = - ReservationSystemTestUtil.calculateClusterResource(totCont); - ReservationSchedulerConfiguration conf = - ReservationSystemTestUtil.createConf(reservationQ, timeWindow, - instConstraint, avgConstraint); + conf = ReservationSystemTestUtil.createConf(reservationQ, timeWindow, + instConstraint, avgConstraint); CapacityOverTimePolicy policy = new CapacityOverTimePolicy(); policy.init(reservationQ, conf); - RMContext context = ReservationSystemTestUtil.createMockRMContext(); - - plan = - new InMemoryPlan(rootQueueMetrics, policy, mAgent, - clusterResource, step, res, minAlloc, maxAlloc, - "dedicated", null, true, context); - } - - public int[] generateData(int length, int val) { - int[] data = new int[length]; - for (int i = 0; i < length; i++) { - data[i] = val; - } - return data; + return policy; } @Test - public void testSimplePass() throws IOException, PlanningException { - // generate allocation that simply fit within all constraints - int[] f = generateData(3600, (int) Math.ceil(0.2 * totCont)); - - ReservationDefinition rDef = - ReservationSystemTestUtil.createSimpleReservationDefinition( - initTime, initTime + f.length + 1, f.length); - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), rDef, "u1", - "dedicated", initTime, initTime + f.length, - ReservationSystemTestUtil.generateAllocation(initTime, step, f), - res, minAlloc), false)); + public void testAllocation() throws IOException, PlanningException { + runTest(); } - @Test(expected = PlanningException.class) - public void testAllocationLargerThanValidWindow() throws IOException, - PlanningException { - // generate allocation that exceed the validWindow - int[] f = generateData(25*3600, (int) Math.ceil(0.69 * totCont)); - - ReservationDefinition rDef = - ReservationSystemTestUtil.createSimpleReservationDefinition( - initTime, initTime + f.length + 1, f.length); - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), rDef, "u1", - "dedicated", initTime, initTime + f.length, - ReservationSystemTestUtil.generateAllocation(initTime, step, f), - res, minAlloc), false)); - } - - @Test - public void testSimplePass2() throws IOException, PlanningException { - // generate allocation from single tenant that exceed avg momentarily but - // fit within - // max instantanesou - int[] f = generateData(3600, (int) Math.ceil(0.69 * totCont)); - ReservationDefinition rDef = - ReservationSystemTestUtil.createSimpleReservationDefinition( - initTime, initTime + f.length + 1, f.length); - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), rDef, "u1", - "dedicated", initTime, initTime + f.length, - ReservationSystemTestUtil.generateAllocation(initTime, step, f), - res, minAlloc), false)); - } - - @Test - public void testMultiTenantPass() throws IOException, PlanningException { - // generate allocation from multiple tenants that barely fit in tot capacity - int[] f = generateData(3600, (int) Math.ceil(0.25 * totCont)); - ReservationDefinition rDef = - ReservationSystemTestUtil.createSimpleReservationDefinition( - initTime, initTime + f.length + 1, f.length); - for (int i = 0; i < 4; i++) { - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), rDef, "u" + i, - "dedicated", initTime, initTime + f.length, - ReservationSystemTestUtil.generateAllocation(initTime, step, f), - res, minAlloc), false)); - } - } - - @Test(expected = PlanningQuotaException.class) - public void testMultiTenantFail() throws IOException, PlanningException { - // generate allocation from multiple tenants that exceed tot capacity - int[] f = generateData(3600, (int) Math.ceil(0.25 * totCont)); - ReservationDefinition rDef = - ReservationSystemTestUtil.createSimpleReservationDefinition( - initTime, initTime + f.length + 1, f.length); - for (int i = 0; i < 5; i++) { - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), rDef, "u" + i, - "dedicated", initTime, initTime + f.length, - ReservationSystemTestUtil.generateAllocation(initTime, step, f), - res, minAlloc), false)); - } - } - - @Test(expected = PlanningQuotaException.class) - public void testInstFail() throws IOException, PlanningException { - // generate allocation that exceed the instantaneous cap single-show - int[] f = generateData(3600, (int) Math.ceil(0.71 * totCont)); - ReservationDefinition rDef = - ReservationSystemTestUtil.createSimpleReservationDefinition( - initTime, initTime + f.length + 1, f.length); - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), rDef, "u1", - "dedicated", initTime, initTime + f.length, - ReservationSystemTestUtil.generateAllocation(initTime, step, f), - res, minAlloc), false)); - Assert.fail("should not have accepted this"); - } - - @Test - public void testInstFailBySum() throws IOException, PlanningException { - // generate allocation that exceed the instantaneous cap by sum - int[] f = generateData(3600, (int) Math.ceil(0.3 * totCont)); - ReservationDefinition rDef = - ReservationSystemTestUtil.createSimpleReservationDefinition( - initTime, initTime + f.length + 1, f.length); - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), rDef, "u1", - "dedicated", initTime, initTime + f.length, - ReservationSystemTestUtil.generateAllocation(initTime, step, f), - res, minAlloc), false)); - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), rDef, "u1", - "dedicated", initTime, initTime + f.length, - ReservationSystemTestUtil.generateAllocation(initTime, step, f), - res, minAlloc), false)); - try { - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), rDef, "u1", - "dedicated", initTime, initTime + f.length, - ReservationSystemTestUtil.generateAllocation(initTime, step, f), - res, minAlloc), false)); - Assert.fail(); - } catch (PlanningQuotaException p) { - // expected - } - } - - @Test(expected = PlanningQuotaException.class) - public void testFailAvg() throws IOException, PlanningException { - // generate an allocation which violates the 25% average single-shot - Map req = - new TreeMap(); - long win = timeWindow / 2 + 100; - int cont = (int) Math.ceil(0.5 * totCont); - req.put(new ReservationInterval(initTime, initTime + win), - ReservationSystemUtil.toResource( - ReservationRequest.newInstance(Resource.newInstance(1024, 1), - cont))); - ReservationDefinition rDef = - ReservationSystemTestUtil.createSimpleReservationDefinition( - initTime, initTime + win, win); - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), rDef, "u1", - "dedicated", initTime, initTime + win, req, res, minAlloc), false)); - } - - @Test - public void testFailAvgBySum() throws IOException, PlanningException { - // generate an allocation which violates the 25% average by sum - Map req = - new TreeMap(); - long win = 86400000 / 4 + 1; - int cont = (int) Math.ceil(0.5 * totCont); - req.put(new ReservationInterval(initTime, initTime + win), - ReservationSystemUtil.toResource(ReservationRequest.newInstance(Resource - .newInstance(1024, 1), cont))); - ReservationDefinition rDef = - ReservationSystemTestUtil.createSimpleReservationDefinition( - initTime, initTime + win, win); - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), rDef, "u1", - "dedicated", initTime, initTime + win, req, res, minAlloc), false)); - - try { - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), null, "u1", - "dedicated", initTime, initTime + win, req, res, minAlloc), false)); - - Assert.fail("should not have accepted this"); - } catch (PlanningQuotaException e) { - // expected - } - } - -} +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestFairSchedulerPlanFollower.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestFairSchedulerPlanFollower.java index 9561234d633..f0f2b35f3d0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestFairSchedulerPlanFollower.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestFairSchedulerPlanFollower.java @@ -38,7 +38,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException; import org.apache.hadoop.yarn.server.resourcemanager.reservation.planning.ReservationAgent; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; @@ -137,7 +136,7 @@ protected void checkDefaultQueueBeforePlanFollowerRun() { } @Override protected void verifyCapacity(Queue defQ) { - assertTrue(((FSQueue) defQ).getWeights().getWeight(ResourceType.MEMORY) > 0.9); + assertTrue(((FSQueue) defQ).getWeight() > 0.9); } @Override @@ -173,8 +172,7 @@ protected void assertReservationQueueExists(ReservationId r, false); assertNotNull(q); // For now we are setting both to same weight - Assert.assertEquals(expectedCapacity, - q.getWeights().getWeight(ResourceType.MEMORY), 0.01); + Assert.assertEquals(expectedCapacity, q.getWeight(), 0.01); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestNoOverCommitPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestNoOverCommitPolicy.java index c5edaf000e8..accdf24a02a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestNoOverCommitPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestNoOverCommitPolicy.java @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,145 +17,70 @@ *******************************************************************************/ package org.apache.hadoop.yarn.server.resourcemanager.reservation; -import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.mock; - import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; -import org.apache.hadoop.yarn.api.records.ReservationDefinition; -import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import net.jcip.annotations.NotThreadSafe; import org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException; import org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.ResourceOverCommitException; -import org.apache.hadoop.yarn.server.resourcemanager.reservation.planning.ReservationAgent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; -import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; -import org.apache.hadoop.yarn.util.resource.ResourceCalculator; -import org.junit.Before; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; -public class TestNoOverCommitPolicy { +/** + * This clas tests {@code NoOverCommitPolicy} sharing policy. + */ +@RunWith(value = Parameterized.class) +@NotThreadSafe +@SuppressWarnings("VisibilityModifier") +public class TestNoOverCommitPolicy extends BaseSharingPolicyTest { - long step; - long initTime; + final static long ONEHOUR = 3600 * 1000; + final static String TWOHOURPERIOD = "7200000"; - InMemoryPlan plan; - ReservationAgent mAgent; - Resource minAlloc; - ResourceCalculator res; - Resource maxAlloc; + @Parameterized.Parameters(name = "Duration {0}, height {1}," + + " submissions {2}, periodic {3})") + public static Collection data() { + return Arrays.asList(new Object[][] { - int totCont = 1000000; + // easy fit + {ONEHOUR, 0.25, 1, null, null }, + {ONEHOUR, 0.25, 1, TWOHOURPERIOD, null }, - @Before - public void setup() throws Exception { + // barely fit + {ONEHOUR, 1, 1, null, null }, + {ONEHOUR, 1, 1, TWOHOURPERIOD, null }, - // 1 sec step - step = 1000L; + // overcommit with single reservation + {ONEHOUR, 1.1, 1, null, ResourceOverCommitException.class }, + {ONEHOUR, 1.1, 1, TWOHOURPERIOD, ResourceOverCommitException.class }, - initTime = System.currentTimeMillis(); - minAlloc = Resource.newInstance(1024, 1); - res = new DefaultResourceCalculator(); - maxAlloc = Resource.newInstance(1024 * 8, 8); + // barely fit with multiple reservations + {ONEHOUR, 0.25, 4, null, null }, + {ONEHOUR, 0.25, 4, TWOHOURPERIOD, null }, - mAgent = mock(ReservationAgent.class); + // overcommit with multiple reservations + {ONEHOUR, 0.25, 5, null, ResourceOverCommitException.class }, + {ONEHOUR, 0.25, 5, TWOHOURPERIOD, ResourceOverCommitException.class } + + }); + } + + @Override + public SharingPolicy getInitializedPolicy() { String reservationQ = ReservationSystemTestUtil.getFullReservationQueueName(); - QueueMetrics rootQueueMetrics = mock(QueueMetrics.class); - Resource clusterResource = - ReservationSystemTestUtil.calculateClusterResource(totCont); - ReservationSchedulerConfiguration conf = mock - (ReservationSchedulerConfiguration.class); - NoOverCommitPolicy policy = new NoOverCommitPolicy(); + conf = new CapacitySchedulerConfiguration(); + SharingPolicy policy = new NoOverCommitPolicy(); policy.init(reservationQ, conf); - RMContext context = ReservationSystemTestUtil.createMockRMContext(); - - plan = - new InMemoryPlan(rootQueueMetrics, policy, mAgent, - clusterResource, step, res, minAlloc, maxAlloc, - "dedicated", null, true, context); - } - - public int[] generateData(int length, int val) { - int[] data = new int[length]; - for (int i = 0; i < length; i++) { - data[i] = val; - } - return data; + return policy; } @Test - public void testSingleUserEasyFitPass() throws IOException, PlanningException { - // generate allocation that easily fit within resource constraints - int[] f = generateData(3600, (int) Math.ceil(0.2 * totCont)); - ReservationDefinition rDef = - ReservationSystemTestUtil.createSimpleReservationDefinition( - initTime, initTime + f.length + 1, f.length); - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), rDef, "u1", - "dedicated", initTime, initTime + f.length, - ReservationSystemTestUtil.generateAllocation(initTime, step, f), - res, minAlloc), false)); + public void testAllocation() throws IOException, PlanningException { + runTest(); } - @Test - public void testSingleUserBarelyFitPass() throws IOException, - PlanningException { - // generate allocation from single tenant that barely fit - int[] f = generateData(3600, totCont); - ReservationDefinition rDef = - ReservationSystemTestUtil.createSimpleReservationDefinition( - initTime, initTime + f.length + 1, f.length); - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), rDef, "u1", - "dedicated", initTime, initTime + f.length, - ReservationSystemTestUtil.generateAllocation(initTime, step, f), - res, minAlloc), false)); - } - - @Test(expected = ResourceOverCommitException.class) - public void testSingleFail() throws IOException, PlanningException { - // generate allocation from single tenant that exceed capacity - int[] f = generateData(3600, (int) (1.1 * totCont)); - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), null, "u1", - "dedicated", initTime, initTime + f.length, ReservationSystemTestUtil - .generateAllocation(initTime, step, f), res, minAlloc), false); - } - - @Test - public void testMultiTenantPass() throws IOException, PlanningException { - // generate allocation from multiple tenants that barely fit in tot capacity - int[] f = generateData(3600, (int) Math.ceil(0.25 * totCont)); - ReservationDefinition rDef = - ReservationSystemTestUtil.createSimpleReservationDefinition( - initTime, initTime + f.length + 1, f.length); - for (int i = 0; i < 4; i++) { - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), rDef, "u" + i, - "dedicated", initTime, initTime + f.length, - ReservationSystemTestUtil.generateAllocation(initTime, step, f), - res, minAlloc), false)); - } - } - - @Test(expected = ResourceOverCommitException.class) - public void testMultiTenantFail() throws IOException, PlanningException { - // generate allocation from multiple tenants that exceed tot capacity - int[] f = generateData(3600, (int) Math.ceil(0.25 * totCont)); - ReservationDefinition rDef = - ReservationSystemTestUtil.createSimpleReservationDefinition( - initTime, initTime + f.length + 1, f.length); - for (int i = 0; i < 5; i++) { - assertTrue(plan.toString(), - plan.addReservation(new InMemoryReservationAllocation( - ReservationSystemTestUtil.getNewReservationId(), rDef, "u" + i, - "dedicated", initTime, initTime + f.length, - ReservationSystemTestUtil.generateAllocation(initTime, step, f), - res, minAlloc), false)); - } - } -} +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestReservationSystem.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestReservationSystem.java index a7b7e32ff9c..6c4fac8e6cf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestReservationSystem.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/TestReservationSystem.java @@ -53,6 +53,10 @@ public class TestReservationSystem extends private Configuration conf; private RMContext mockRMContext; + public TestReservationSystem(SchedulerType type) throws IOException { + super(type); + } + @Before public void setUp() throws IOException { scheduler = initializeScheduler(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResourceProfiles.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResourceProfiles.java new file mode 100644 index 00000000000..291f103c8a7 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResourceProfiles.java @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.resource; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * Common test class for resource profile related tests. + */ +public class TestResourceProfiles { + public static final String TEST_CONF_RESET_RESOURCE_TYPES = + "yarn.test.reset-resource-types"; + + @Test + public void testProfilesEnabled() throws Exception { + ResourceProfilesManager manager = new ResourceProfilesManagerImpl(); + Configuration conf = new Configuration(); + // be default resource profiles should not be enabled + manager.init(conf); + try { + manager.getResourceProfiles(); + Assert + .fail("Exception should be thrown as resource profile is not enabled" + + " and getResourceProfiles is invoked."); + } catch (YarnException ie) { + } + conf.setBoolean(YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED, true); + try { + manager.init(conf); + Assert.fail( + "Exception should be thrown due to missing resource profiles file"); + } catch (IOException ie) { + } + conf.set(YarnConfiguration.RM_RESOURCE_PROFILES_SOURCE_FILE, + "profiles/sample-profiles-1.json"); + manager.init(conf); + } + + @Test + public void testLoadProfiles() throws Exception { + ResourceProfilesManager manager = new ResourceProfilesManagerImpl(); + Configuration conf = new Configuration(); + conf.setBoolean(YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED, true); + conf.set(YarnConfiguration.RM_RESOURCE_PROFILES_SOURCE_FILE, + "profiles/sample-profiles-1.json"); + manager.init(conf); + Map profiles = manager.getResourceProfiles(); + Map expected = new HashMap<>(); + expected.put("minimum", Resource.newInstance(1024, 1)); + expected.put("default", Resource.newInstance(2048, 2)); + expected.put("maximum", Resource.newInstance(4096, 4)); + + for (Map.Entry entry : expected.entrySet()) { + String profile = entry.getKey(); + Resource res = entry.getValue(); + Assert.assertTrue("Mandatory profile '" + profile + "' missing", + profiles.containsKey(profile)); + Assert.assertEquals("Profile " + profile + "' resources don't match", res, + manager.getProfile(profile)); + } + } + + @Test + public void testLoadProfilesMissingMandatoryProfile() throws Exception { + + Configuration conf = new Configuration(); + conf.setBoolean(YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED, true); + + String[] badProfiles = {"profiles/illegal-profiles-1.json", + "profiles/illegal-profiles-2.json", "profiles/illegal-profiles-3.json"}; + for (String file : badProfiles) { + ResourceProfilesManager manager = new ResourceProfilesManagerImpl(); + conf.set(YarnConfiguration.RM_RESOURCE_PROFILES_SOURCE_FILE, file); + try { + manager.init(conf); + Assert.fail("Bad profile '" + file + "' is not valid"); + } catch (IOException ie) { + } + } + } + + @Test + public void testGetProfile() throws Exception { + Configuration conf = new Configuration(); + conf.setBoolean(YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED, true); + ResourceProfilesManager manager = new ResourceProfilesManagerImpl(); + conf.set(YarnConfiguration.RM_RESOURCE_PROFILES_SOURCE_FILE, + "profiles/sample-profiles-2.json"); + manager.init(conf); + Map expected = new HashMap<>(); + expected.put("minimum", Resource.newInstance(1024, 1)); + expected.put("default", Resource.newInstance(2048, 2)); + expected.put("maximum", Resource.newInstance(4096, 4)); + expected.put("small", Resource.newInstance(1024, 1)); + expected.put("medium", Resource.newInstance(2048, 1)); + expected.put("large", Resource.newInstance(4096, 4)); + + for (Map.Entry entry : expected.entrySet()) { + String profile = entry.getKey(); + Resource res = entry.getValue(); + Assert.assertEquals("Profile " + profile + "' resources don't match", res, + manager.getProfile(profile)); + } + } + + @Test + public void testGetMandatoryProfiles() throws Exception { + ResourceProfilesManager manager = new ResourceProfilesManagerImpl(); + Configuration conf = new Configuration(); + conf.setBoolean(YarnConfiguration.RM_RESOURCE_PROFILES_ENABLED, true); + conf.set(YarnConfiguration.RM_RESOURCE_PROFILES_SOURCE_FILE, + "profiles/sample-profiles-1.json"); + manager.init(conf); + Map expected = new HashMap<>(); + expected.put("minimum", Resource.newInstance(1024, 1)); + expected.put("default", Resource.newInstance(2048, 2)); + expected.put("maximum", Resource.newInstance(4096, 4)); + + Assert.assertEquals("Profile 'minimum' resources don't match", + expected.get("minimum"), manager.getMinimumProfile()); + Assert.assertEquals("Profile 'default' resources don't match", + expected.get("default"), manager.getDefaultProfile()); + Assert.assertEquals("Profile 'maximum' resources don't match", + expected.get("maximum"), manager.getMaximumProfile()); + + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResourceWeights.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResourceWeights.java deleted file mode 100644 index f420b9ecd22..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResourceWeights.java +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.yarn.server.resourcemanager.resource; - -import org.junit.Assert; - -import org.junit.Test; - -public class TestResourceWeights { - - @Test(timeout=3000) - public void testWeights() { - ResourceWeights rw1 = new ResourceWeights(); - Assert.assertEquals("Default CPU weight should be 0.0f.", 0.0f, - rw1.getWeight(ResourceType.CPU), 0.00001f); - Assert.assertEquals("Default memory weight should be 0.0f", 0.0f, - rw1.getWeight(ResourceType.MEMORY), 0.00001f); - - ResourceWeights rw2 = new ResourceWeights(2.0f); - Assert.assertEquals("The CPU weight should be 2.0f.", 2.0f, - rw2.getWeight(ResourceType.CPU), 0.00001f); - Assert.assertEquals("The memory weight should be 2.0f", 2.0f, - rw2.getWeight(ResourceType.MEMORY), 0.00001f); - - // set each individually - ResourceWeights rw3 = new ResourceWeights(1.5f, 2.0f); - Assert.assertEquals("The CPU weight should be 2.0f", 2.0f, - rw3.getWeight(ResourceType.CPU), 0.00001f); - Assert.assertEquals("The memory weight should be 1.5f", 1.5f, - rw3.getWeight(ResourceType.MEMORY), 0.00001f); - - // reset weights - rw3.setWeight(ResourceType.CPU, 2.5f); - Assert.assertEquals("The CPU weight should be set to 2.5f.", 2.5f, - rw3.getWeight(ResourceType.CPU), 0.00001f); - rw3.setWeight(ResourceType.MEMORY, 4.0f); - Assert.assertEquals("The memory weight should be set to 4.0f.", 4.0f, - rw3.getWeight(ResourceType.MEMORY), 0.00001f); - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResources.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResources.java deleted file mode 100644 index 2a10747ac9d..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resource/TestResources.java +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.yarn.server.resourcemanager.resource; - -import static org.apache.hadoop.yarn.util.resource.Resources.*; -import static org.junit.Assert.*; -import org.junit.Test; - -public class TestResources { - @Test(timeout=10000) - public void testFitsIn() { - assertTrue(fitsIn(createResource(1, 1), createResource(2, 2))); - assertTrue(fitsIn(createResource(2, 2), createResource(2, 2))); - assertFalse(fitsIn(createResource(2, 2), createResource(1, 1))); - assertFalse(fitsIn(createResource(1, 2), createResource(2, 1))); - assertFalse(fitsIn(createResource(2, 1), createResource(1, 2))); - } - - @Test(timeout=10000) - public void testComponentwiseMin() { - assertEquals(createResource(1, 1), - componentwiseMin(createResource(1, 1), createResource(2, 2))); - assertEquals(createResource(1, 1), - componentwiseMin(createResource(2, 2), createResource(1, 1))); - assertEquals(createResource(1, 1), - componentwiseMin(createResource(1, 2), createResource(2, 1))); - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMReconnect.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMReconnect.java index 6a7325c25c8..3c4e6b424de 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMReconnect.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMReconnect.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.resourcetracker; +import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -70,6 +71,10 @@ public class TestNMReconnect extends ParameterizedSchedulerTestBase { private Dispatcher dispatcher; private RMContextImpl context; + public TestNMReconnect(SchedulerType type) throws IOException { + super(type); + } + private class TestRMNodeEventDispatcher implements EventHandler { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestApplicationLifetimeMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestApplicationLifetimeMonitor.java index f7e76bb077b..a7808d7b567 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestApplicationLifetimeMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestApplicationLifetimeMonitor.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.Set; @@ -50,6 +51,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; import org.apache.hadoop.yarn.util.Times; import org.apache.log4j.Level; import org.apache.log4j.LogManager; @@ -67,6 +71,9 @@ public class TestApplicationLifetimeMonitor { @Before public void setup() throws IOException { conf = new YarnConfiguration(); + // Always run for CS, since other scheduler do not support this. + conf.setClass(YarnConfiguration.RM_SCHEDULER, + CapacityScheduler.class, ResourceScheduler.class); Logger rootLogger = LogManager.getRootLogger(); rootLogger.setLevel(Level.DEBUG); UserGroupInformation.setConfiguration(conf); @@ -78,8 +85,15 @@ public void setup() throws IOException { public void testApplicationLifetimeMonitor() throws Exception { MockRM rm = null; try { + long maxLifetime = 30L; + long defaultLifetime = 15L; + + YarnConfiguration newConf = + new YarnConfiguration(setUpCSQueue(maxLifetime, defaultLifetime)); + conf = new YarnConfiguration(newConf); rm = new MockRM(conf); rm.start(); + Priority appPriority = Priority.newInstance(0); MockNM nm1 = rm.registerNode("127.0.0.1:1234", 16 * 1024); @@ -92,6 +106,13 @@ public void testApplicationLifetimeMonitor() throws Exception { timeouts.put(ApplicationTimeoutType.LIFETIME, 20L); RMApp app2 = rm.submitApp(1024, appPriority, timeouts); + // user not set lifetime, so queue max lifetime will be considered. + RMApp app3 = rm.submitApp(1024, appPriority, Collections.emptyMap()); + + // asc lifetime exceeds queue max lifetime + timeouts.put(ApplicationTimeoutType.LIFETIME, 40L); + RMApp app4 = rm.submitApp(1024, appPriority, timeouts); + nm1.nodeHeartbeat(true); // Send launch Event MockAM am1 = @@ -103,8 +124,9 @@ public void testApplicationLifetimeMonitor() throws Exception { Map updateTimeout = new HashMap(); - long newLifetime = 10L; - // update 10L seconds more to timeout + long newLifetime = 40L; + // update 30L seconds more to timeout which is greater than queue max + // lifetime String formatISO8601 = Times.formatISO8601(System.currentTimeMillis() + newLifetime * 1000); updateTimeout.put(ApplicationTimeoutType.LIFETIME, formatISO8601); @@ -142,8 +164,6 @@ public void testApplicationLifetimeMonitor() throws Exception { !appTimeouts.isEmpty()); ApplicationTimeout timeout = appTimeouts.get(ApplicationTimeoutType.LIFETIME); - Assert.assertEquals("Application timeout string is incorrect.", - formatISO8601, timeout.getExpiryTime()); Assert.assertTrue("Application remaining time is incorrect", timeout.getRemainingTime() > 0); @@ -152,6 +172,17 @@ public void testApplicationLifetimeMonitor() throws Exception { Assert.assertTrue("Application killed before lifetime value", app2.getFinishTime() > afterUpdate); + rm.waitForState(app3.getApplicationId(), RMAppState.KILLED); + + // app4 submitted exceeding queue max lifetime, so killed after queue max + // lifetime. + rm.waitForState(app4.getApplicationId(), RMAppState.KILLED); + long totalTimeRun = (app4.getFinishTime() - app4.getSubmitTime()) / 1000; + Assert.assertTrue("Application killed before lifetime value", + totalTimeRun > maxLifetime); + Assert.assertTrue( + "Application killed before lifetime value " + totalTimeRun, + totalTimeRun < maxLifetime + 10L); } finally { stopRM(rm); } @@ -172,7 +203,7 @@ public void testApplicationLifetimeOnRMRestart() throws Exception { nm1.registerNode(); nm1.nodeHeartbeat(true); - long appLifetime = 60L; + long appLifetime = 30L; Map timeouts = new HashMap(); timeouts.put(ApplicationTimeoutType.LIFETIME, appLifetime); @@ -305,6 +336,21 @@ public synchronized void updateApplicationStateInternal( } } + private CapacitySchedulerConfiguration setUpCSQueue(long maxLifetime, + long defaultLifetime) { + CapacitySchedulerConfiguration csConf = + new CapacitySchedulerConfiguration(); + csConf.setQueues(CapacitySchedulerConfiguration.ROOT, + new String[] {"default"}); + csConf.setCapacity(CapacitySchedulerConfiguration.ROOT + ".default", 100); + csConf.setMaximumLifetimePerQueue( + CapacitySchedulerConfiguration.ROOT + ".default", maxLifetime); + csConf.setDefaultLifetimePerQueue( + CapacitySchedulerConfiguration.ROOT + ".default", defaultLifetime); + + return csConf; + } + private void stopRM(MockRM rm) { if (rm != null) { rm.stop(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java index 60b9e4bc95d..979e68a25d2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java @@ -84,13 +84,16 @@ @SuppressWarnings("unchecked") public class TestAbstractYarnScheduler extends ParameterizedSchedulerTestBase { + public TestAbstractYarnScheduler(SchedulerType type) throws IOException { + super(type); + } + @Test public void testMaximimumAllocationMemory() throws Exception { final int node1MaxMemory = 15 * 1024; final int node2MaxMemory = 5 * 1024; final int node3MaxMemory = 6 * 1024; final int configuredMaxMemory = 10 * 1024; - configureScheduler(); YarnConfiguration conf = getConf(); conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, configuredMaxMemory); @@ -177,7 +180,6 @@ public void testMaximimumAllocationVCores() throws Exception { final int node2MaxVCores = 5; final int node3MaxVCores = 6; final int configuredMaxVCores = 10; - configureScheduler(); YarnConfiguration conf = getConf(); conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, configuredMaxVCores); @@ -381,7 +383,6 @@ public void testMaxAllocationAfterUpdateNodeResource() throws IOException { @Test(timeout = 10000) public void testReleasedContainerIfAppAttemptisNull() throws Exception { YarnConfiguration conf=getConf(); - conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true); MockRM rm1 = new MockRM(conf); try { rm1.start(); @@ -425,7 +426,6 @@ public void testReleasedContainerIfAppAttemptisNull() throws Exception { @Test(timeout=60000) public void testContainerReleasedByNode() throws Exception { System.out.println("Starting testContainerReleasedByNode"); - configureScheduler(); YarnConfiguration conf = getConf(); MockRM rm1 = new MockRM(conf); try { @@ -538,7 +538,6 @@ public void testContainerReleasedByNode() throws Exception { @Test(timeout = 60000) public void testResourceRequestRestoreWhenRMContainerIsAtAllocated() throws Exception { - configureScheduler(); YarnConfiguration conf = getConf(); MockRM rm1 = new MockRM(conf); try { @@ -627,7 +626,6 @@ public void testResourceRequestRestoreWhenRMContainerIsAtAllocated() public void testResourceRequestRecoveryToTheRightAppAttempt() throws Exception { - configureScheduler(); YarnConfiguration conf = getConf(); MockRM rm = new MockRM(conf); try { @@ -798,7 +796,6 @@ private ResourceTrackerService getPrivateResourceTrackerService( */ @Test(timeout = 60000) public void testNodemanagerReconnect() throws Exception { - configureScheduler(); Configuration conf = getConf(); MockRM rm = new MockRM(conf); try { @@ -846,4 +843,35 @@ public void testNodemanagerReconnect() throws Exception { rm.stop(); } } + + @Test(timeout = 10000) + public void testUpdateThreadLifeCycle() throws Exception { + MockRM rm = new MockRM(getConf()); + try { + rm.start(); + AbstractYarnScheduler scheduler = + (AbstractYarnScheduler) rm.getResourceScheduler(); + + if (getSchedulerType().equals(SchedulerType.FAIR)) { + Thread updateThread = scheduler.updateThread; + Assert.assertTrue(updateThread.isAlive()); + scheduler.stop(); + + int numRetries = 100; + while (numRetries-- > 0 && updateThread.isAlive()) { + Thread.sleep(50); + } + + Assert.assertNotEquals("The Update thread is still alive", 0, numRetries); + } else if (getSchedulerType().equals(SchedulerType.CAPACITY)) { + Assert.assertNull("updateThread shouldn't have been created", + scheduler.updateThread); + } else { + Assert.fail("Unhandled SchedulerType, " + getSchedulerType() + + ", please update this unit test."); + } + } finally { + rm.stop(); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulingWithAllocationRequestId.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulingWithAllocationRequestId.java index e60fd6f889a..8e983321136 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulingWithAllocationRequestId.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulingWithAllocationRequestId.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; +import java.io.IOException; import java.util.List; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; @@ -30,6 +31,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.ParameterizedSchedulerTestBase; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairSchedulerConfiguration; import org.junit.Assert; import org.junit.Test; import org.slf4j.Logger; @@ -46,9 +48,23 @@ public class TestSchedulingWithAllocationRequestId LoggerFactory.getLogger(TestSchedulingWithAllocationRequestId.class); private static final int GB = 1024; - @Test + public TestSchedulingWithAllocationRequestId(SchedulerType type) throws IOException { + super(type); + } + + @Override + public YarnConfiguration getConf() { + YarnConfiguration conf = super.getConf(); + if (getSchedulerType().equals(SchedulerType.FAIR)) { + // Some tests here rely on being able to assign multiple containers with + // a single heartbeat + conf.setBoolean(FairSchedulerConfiguration.ASSIGN_MULTIPLE, true); + } + return conf; + } + + @Test (timeout = 10000) public void testMultipleAllocationRequestIds() throws Exception { - configureScheduler(); YarnConfiguration conf = getConf(); MockRM rm = new MockRM(conf); try { @@ -63,32 +79,20 @@ public void testMultipleAllocationRequestIds() throws Exception { MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId()); am1.registerAppAttempt(); - // add request for containers with id 10 & 20 - am1.addRequests(new String[] {"127.0.0.1" }, 2 * GB, 1, 1, 10L); - AllocateResponse allocResponse = am1.schedule(); // send the request - am1.addRequests(new String[] {"127.0.0.2" }, 2 * GB, 1, 2, 20L); - allocResponse = am1.schedule(); // send the request + // send requests for containers with id 10 & 20 + am1.allocate(am1.createReq( + new String[] {"127.0.0.1"}, 2 * GB, 1, 1, 10L), null); + am1.allocate(am1.createReq( + new String[] {"127.0.0.2"}, 2 * GB, 1, 2, 20L), null); // check if request id 10 is satisfied - nm1.nodeHeartbeat(true); - allocResponse = am1.schedule(); // send the request - while (allocResponse.getAllocatedContainers().size() < 1) { - LOG.info("Waiting for containers to be created for app 1..."); - Thread.sleep(100); - allocResponse = am1.schedule(); - } + AllocateResponse allocResponse = waitForAllocResponse(rm, am1, nm1, 1); List allocated = allocResponse.getAllocatedContainers(); Assert.assertEquals(1, allocated.size()); checkAllocatedContainer(allocated.get(0), 2 * GB, nm1.getNodeId(), 10); // check now if request id 20 is satisfied - nm2.nodeHeartbeat(true); - while (allocResponse.getAllocatedContainers().size() < 2) { - LOG.info("Waiting for containers to be created for app 1..."); - Thread.sleep(100); - allocResponse = am1.schedule(); - } - + allocResponse = waitForAllocResponse(rm, am1, nm2, 2); allocated = allocResponse.getAllocatedContainers(); Assert.assertEquals(2, allocated.size()); for (Container container : allocated) { @@ -101,9 +105,8 @@ public void testMultipleAllocationRequestIds() throws Exception { } } - @Test + @Test (timeout = 10000) public void testMultipleAllocationRequestDiffPriority() throws Exception { - configureScheduler(); YarnConfiguration conf = getConf(); MockRM rm = new MockRM(conf); try { @@ -118,20 +121,14 @@ public void testMultipleAllocationRequestDiffPriority() throws Exception { MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId()); am1.registerAppAttempt(); - // add request for containers with id 10 & 20 - am1.addRequests(new String[] {"127.0.0.1" }, 2 * GB, 2, 1, 10L); - AllocateResponse allocResponse = am1.schedule(); // send the request - am1.addRequests(new String[] {"127.0.0.2" }, 2 * GB, 1, 2, 20L); - allocResponse = am1.schedule(); // send the request + // send requests for containers with id 10 & 20 + am1.allocate(am1.createReq( + new String[] {"127.0.0.1"}, 2 * GB, 2, 1, 10L), null); + am1.allocate(am1.createReq( + new String[] {"127.0.0.2"}, 2 * GB, 1, 2, 20L), null); // check if request id 20 is satisfied first - nm2.nodeHeartbeat(true); - while (allocResponse.getAllocatedContainers().size() < 2) { - LOG.info("Waiting for containers to be created for app 1..."); - Thread.sleep(100); - allocResponse = am1.schedule(); - } - + AllocateResponse allocResponse = waitForAllocResponse(rm, am1, nm2, 2); List allocated = allocResponse.getAllocatedContainers(); Assert.assertEquals(2, allocated.size()); for (Container container : allocated) { @@ -139,13 +136,7 @@ public void testMultipleAllocationRequestDiffPriority() throws Exception { } // check now if request id 10 is satisfied - nm1.nodeHeartbeat(true); - allocResponse = am1.schedule(); // send the request - while (allocResponse.getAllocatedContainers().size() < 1) { - LOG.info("Waiting for containers to be created for app 1..."); - Thread.sleep(100); - allocResponse = am1.schedule(); - } + allocResponse = waitForAllocResponse(rm, am1, nm1, 1); allocated = allocResponse.getAllocatedContainers(); Assert.assertEquals(1, allocated.size()); checkAllocatedContainer(allocated.get(0), 2 * GB, nm1.getNodeId(), 10); @@ -164,9 +155,8 @@ private void checkAllocatedContainer(Container allocated, int memory, allocated.getAllocationRequestId()); } - @Test + @Test (timeout = 10000) public void testMultipleAppsWithAllocationReqId() throws Exception { - configureScheduler(); YarnConfiguration conf = getConf(); MockRM rm = new MockRM(conf); try { @@ -190,19 +180,11 @@ public void testMultipleAppsWithAllocationReqId() throws Exception { // Submit app1 RR with allocationReqId = 5 int numContainers = 1; - am1.addRequests(new String[] {host0, host1 }, 1 * GB, 1, numContainers, - 5L); - AllocateResponse allocResponse = am1.schedule(); - - // wait for containers to be allocated. - nm1.nodeHeartbeat(true); - allocResponse = am1.schedule(); // send the request - while (allocResponse.getAllocatedContainers().size() < 1) { - LOG.info("Waiting for containers to be created for app 1..."); - Thread.sleep(100); - allocResponse = am1.schedule(); - } + am1.allocate(am1.createReq( + new String[] {host0, host1}, 1 * GB, 1, numContainers, 5L), null); + // wait for container to be allocated. + AllocateResponse allocResponse = waitForAllocResponse(rm, am1, nm1, 1); List allocated = allocResponse.getAllocatedContainers(); Assert.assertEquals(1, allocated.size()); checkAllocatedContainer(allocated.get(0), 1 * GB, nm1.getNodeId(), 5L); @@ -212,55 +194,31 @@ public void testMultipleAppsWithAllocationReqId() throws Exception { MockAM am2 = MockRM.launchAndRegisterAM(app2, rm, nm2); // Submit app2 RR with allocationReqId = 5 - am2.addRequests(new String[] {host0, host1 }, 2 * GB, 1, numContainers, - 5L); - am2.schedule(); - - // wait for containers to be allocated. - nm2.nodeHeartbeat(true); - allocResponse = am2.schedule(); // send the request - while (allocResponse.getAllocatedContainers().size() < 1) { - LOG.info("Waiting for containers to be created for app 1..."); - Thread.sleep(100); - allocResponse = am2.schedule(); - } + am2.allocate(am1.createReq( + new String[] {host0, host1}, 2 * GB, 1, numContainers, 5L), null); + // wait for container to be allocated. + allocResponse = waitForAllocResponse(rm, am2, nm2, 1); allocated = allocResponse.getAllocatedContainers(); Assert.assertEquals(1, allocated.size()); checkAllocatedContainer(allocated.get(0), 2 * GB, nm2.getNodeId(), 5L); // Now submit app2 RR with allocationReqId = 10 - am2.addRequests(new String[] {host0, host1 }, 3 * GB, 1, numContainers, - 10L); - am2.schedule(); - - // wait for containers to be allocated. - nm1.nodeHeartbeat(true); - allocResponse = am2.schedule(); // send the request - while (allocResponse.getAllocatedContainers().size() < 1) { - LOG.info("Waiting for containers to be created for app 1..."); - Thread.sleep(100); - allocResponse = am2.schedule(); - } + am2.allocate(am1.createReq( + new String[] {host0, host1}, 3 * GB, 1, numContainers, 10L), null); + // wait for container to be allocated. + allocResponse = waitForAllocResponse(rm, am2, nm1, 1); allocated = allocResponse.getAllocatedContainers(); Assert.assertEquals(1, allocated.size()); checkAllocatedContainer(allocated.get(0), 3 * GB, nm1.getNodeId(), 10L); // Now submit app1 RR with allocationReqId = 10 - am1.addRequests(new String[] {host0, host1 }, 4 * GB, 1, numContainers, - 10L); - am1.schedule(); - - // wait for containers to be allocated. - nm2.nodeHeartbeat(true); - allocResponse = am1.schedule(); // send the request - while (allocResponse.getAllocatedContainers().size() < 1) { - LOG.info("Waiting for containers to be created for app 1..."); - Thread.sleep(100); - allocResponse = am1.schedule(); - } + am1.allocate(am1.createReq( + new String[] {host0, host1}, 4 * GB, 1, numContainers, 10L), null); + // wait for container to be allocated. + allocResponse = waitForAllocResponse(rm, am1, nm2, 1); allocated = allocResponse.getAllocatedContainers(); Assert.assertEquals(1, allocated.size()); checkAllocatedContainer(allocated.get(0), 4 * GB, nm2.getNodeId(), 10L); @@ -271,4 +229,17 @@ public void testMultipleAppsWithAllocationReqId() throws Exception { } } + private AllocateResponse waitForAllocResponse(MockRM rm, MockAM am, MockNM nm, + int size) throws Exception { + AllocateResponse allocResponse = am.doHeartbeat(); + while (allocResponse.getAllocatedContainers().size() < size) { + LOG.info("Waiting for containers to be created for app..."); + nm.nodeHeartbeat(true); + ((AbstractYarnScheduler) rm.getResourceScheduler()).update(); + Thread.sleep(100); + allocResponse = am.doHeartbeat(); + } + return allocResponse; + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index a52622246c3..a03993515c8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -4319,143 +4319,6 @@ public void testCSReservationWithRootUnblocked() throws Exception { rm.stop(); } - @Test (timeout = 300000) - public void testUserLimitThroughput() throws Exception { - // Since this is more of a performance unit test, only run if - // RunUserLimitThroughput is set (-DRunUserLimitThroughput=true) - Assume.assumeTrue(Boolean.valueOf( - System.getProperty("RunUserLimitThroughput"))); - - CapacitySchedulerConfiguration csconf = - new CapacitySchedulerConfiguration(); - csconf.setMaximumApplicationMasterResourcePerQueuePercent("root", 100.0f); - csconf.setMaximumAMResourcePercentPerPartition("root", "", 100.0f); - csconf.setMaximumApplicationMasterResourcePerQueuePercent("root.default", - 100.0f); - csconf.setMaximumAMResourcePercentPerPartition("root.default", "", 100.0f); - csconf.setResourceComparator(DominantResourceCalculator.class); - - YarnConfiguration conf = new YarnConfiguration(csconf); - conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, - ResourceScheduler.class); - - MockRM rm = new MockRM(conf); - rm.start(); - - CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); - LeafQueue qb = (LeafQueue)cs.getQueue("default"); - - // For now make user limit large so we can activate all applications - qb.setUserLimitFactor((float)100.0); - qb.setupConfigurableCapacities(); - - SchedulerEvent addAppEvent; - SchedulerEvent addAttemptEvent; - Container container = mock(Container.class); - ApplicationSubmissionContext submissionContext = - mock(ApplicationSubmissionContext.class); - - final int appCount = 100; - ApplicationId[] appids = new ApplicationId[appCount]; - RMAppAttemptImpl[] attempts = new RMAppAttemptImpl[appCount]; - ApplicationAttemptId[] appAttemptIds = new ApplicationAttemptId[appCount]; - RMAppImpl[] apps = new RMAppImpl[appCount]; - RMAppAttemptMetrics[] attemptMetrics = new RMAppAttemptMetrics[appCount]; - for (int i=0; i loggers=LogManager.getCurrentLoggers(); - loggers.hasMoreElements(); ) { - Logger logger = (Logger) loggers.nextElement(); - logger.setLevel(Level.WARN); - } - final int topn = 20; - final int iterations = 2000000; - final int printInterval = 20000; - final float numerator = 1000.0f * printInterval; - PriorityQueue queue = new PriorityQueue<>(topn, - Collections.reverseOrder()); - - long n = Time.monotonicNow(); - long timespent = 0; - for (int i = 0; i < iterations; i+=2) { - if (i > 0 && i % printInterval == 0){ - long ts = (Time.monotonicNow() - n); - if (queue.size() < topn) { - queue.offer(ts); - } else { - Long last = queue.peek(); - if (last > ts) { - queue.poll(); - queue.offer(ts); - } - } - System.out.println(i + " " + (numerator / ts)); - n= Time.monotonicNow(); - } - cs.handle(new NodeUpdateSchedulerEvent(node)); - cs.handle(new NodeUpdateSchedulerEvent(node2)); - } - timespent=0; - int entries = queue.size(); - while(queue.size() > 0){ - long l = queue.poll(); - timespent += l; - } - System.out.println("Avg of fastest " + entries + ": " - + numerator / (timespent / entries)); - rm.stop(); - } - @Test public void testCSQueueBlocked() throws Exception { CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration(); @@ -4909,6 +4772,96 @@ public void testAMLimitDouble() throws Exception { rm.stop(); } + @Test(timeout = 30000) + public void testcheckAndGetApplicationLifetime() throws Exception { + long maxLifetime = 10; + long defaultLifetime = 5; + // positive integer value + CapacityScheduler cs = setUpCSQueue(maxLifetime, defaultLifetime); + Assert.assertEquals(maxLifetime, + cs.checkAndGetApplicationLifetime("default", 100)); + Assert.assertEquals(9, cs.checkAndGetApplicationLifetime("default", 9)); + Assert.assertEquals(defaultLifetime, + cs.checkAndGetApplicationLifetime("default", -1)); + Assert.assertEquals(defaultLifetime, + cs.checkAndGetApplicationLifetime("default", 0)); + Assert.assertEquals(maxLifetime, + cs.getMaximumApplicationLifetime("default")); + + maxLifetime = -1; + defaultLifetime = -1; + // test for default values + cs = setUpCSQueue(maxLifetime, defaultLifetime); + Assert.assertEquals(100, cs.checkAndGetApplicationLifetime("default", 100)); + Assert.assertEquals(defaultLifetime, + cs.checkAndGetApplicationLifetime("default", -1)); + Assert.assertEquals(0, cs.checkAndGetApplicationLifetime("default", 0)); + Assert.assertEquals(maxLifetime, + cs.getMaximumApplicationLifetime("default")); + + maxLifetime = 10; + defaultLifetime = 10; + cs = setUpCSQueue(maxLifetime, defaultLifetime); + Assert.assertEquals(maxLifetime, + cs.checkAndGetApplicationLifetime("default", 100)); + Assert.assertEquals(defaultLifetime, + cs.checkAndGetApplicationLifetime("default", -1)); + Assert.assertEquals(defaultLifetime, + cs.checkAndGetApplicationLifetime("default", 0)); + Assert.assertEquals(maxLifetime, + cs.getMaximumApplicationLifetime("default")); + + maxLifetime = 0; + defaultLifetime = 0; + cs = setUpCSQueue(maxLifetime, defaultLifetime); + Assert.assertEquals(100, cs.checkAndGetApplicationLifetime("default", 100)); + Assert.assertEquals(-1, cs.checkAndGetApplicationLifetime("default", -1)); + Assert.assertEquals(0, cs.checkAndGetApplicationLifetime("default", 0)); + + maxLifetime = 10; + defaultLifetime = -1; + cs = setUpCSQueue(maxLifetime, defaultLifetime); + Assert.assertEquals(maxLifetime, + cs.checkAndGetApplicationLifetime("default", 100)); + Assert.assertEquals(maxLifetime, + cs.checkAndGetApplicationLifetime("default", -1)); + Assert.assertEquals(maxLifetime, + cs.checkAndGetApplicationLifetime("default", 0)); + + maxLifetime = 5; + defaultLifetime = 10; + try { + setUpCSQueue(maxLifetime, defaultLifetime); + Assert.fail("Expected to fails since maxLifetime < defaultLifetime."); + } catch (YarnRuntimeException ye) { + Assert.assertTrue( + ye.getMessage().contains("can't exceed maximum lifetime")); + } + } + + private CapacityScheduler setUpCSQueue(long maxLifetime, + long defaultLifetime) { + CapacitySchedulerConfiguration csConf = + new CapacitySchedulerConfiguration(); + csConf.setQueues(CapacitySchedulerConfiguration.ROOT, + new String[] {"default"}); + csConf.setCapacity(CapacitySchedulerConfiguration.ROOT + ".default", 100); + csConf.setMaximumLifetimePerQueue( + CapacitySchedulerConfiguration.ROOT + ".default", maxLifetime); + csConf.setDefaultLifetimePerQueue( + CapacitySchedulerConfiguration.ROOT + ".default", defaultLifetime); + + YarnConfiguration conf = new YarnConfiguration(csConf); + CapacityScheduler cs = new CapacityScheduler(); + + RMContext rmContext = TestUtils.getMockRMContext(); + cs.setConf(conf); + cs.setRMContext(rmContext); + cs.init(conf); + + return cs; + } + private void waitforNMRegistered(ResourceScheduler scheduler, int nodecount, int timesec) throws InterruptedException { long start = System.currentTimeMillis(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPerf.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPerf.java new file mode 100644 index 00000000000..a2ccf6e30e6 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPerf.java @@ -0,0 +1,265 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +import org.apache.hadoop.util.Time; +import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.factories.RecordFactory; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.server.resourcemanager.MockNodes; +import org.apache.hadoop.yarn.server.resourcemanager.MockRM; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; +import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; +import org.apache.log4j.Level; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.junit.Assume; +import org.junit.Test; + +import java.util.Collections; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Map; +import java.util.PriorityQueue; + +import static org.apache.hadoop.yarn.server.resourcemanager.resource.TestResourceProfiles.TEST_CONF_RESET_RESOURCE_TYPES; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class TestCapacitySchedulerPerf { + private final int GB = 1024; + + private String getResourceName(int idx) { + return "resource-" + idx; + } + + private void testUserLimitThroughputWithNumberOfResourceTypes( + int numOfResourceTypes) + throws Exception { + if (numOfResourceTypes > 2) { + // Initialize resource map + Map riMap = new HashMap<>(); + + // Initialize mandatory resources + riMap.put(ResourceInformation.MEMORY_URI, ResourceInformation.MEMORY_MB); + riMap.put(ResourceInformation.VCORES_URI, ResourceInformation.VCORES); + + for (int i = 2; i < numOfResourceTypes; i++) { + String resourceName = getResourceName(i); + riMap.put(resourceName, ResourceInformation + .newInstance(resourceName, "", 0, ResourceTypes.COUNTABLE, 0, + Integer.MAX_VALUE)); + } + + ResourceUtils.initializeResourcesFromResourceInformationMap(riMap); + } + + // Since this is more of a performance unit test, only run if + // RunUserLimitThroughput is set (-DRunUserLimitThroughput=true) + Assume.assumeTrue(Boolean.valueOf( + System.getProperty("RunCapacitySchedulerPerfTests"))); + + CapacitySchedulerConfiguration csconf = + new CapacitySchedulerConfiguration(); + csconf.setMaximumApplicationMasterResourcePerQueuePercent("root", 100.0f); + csconf.setMaximumAMResourcePercentPerPartition("root", "", 100.0f); + csconf.setMaximumApplicationMasterResourcePerQueuePercent("root.default", + 100.0f); + csconf.setMaximumAMResourcePercentPerPartition("root.default", "", 100.0f); + csconf.setResourceComparator(DominantResourceCalculator.class); + + YarnConfiguration conf = new YarnConfiguration(csconf); + // Don't reset resource types since we have already configured resource types + conf.setBoolean(TEST_CONF_RESET_RESOURCE_TYPES, false); + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + + MockRM rm = new MockRM(conf); + rm.start(); + + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + LeafQueue qb = (LeafQueue)cs.getQueue("default"); + + // For now make user limit large so we can activate all applications + qb.setUserLimitFactor((float)100.0); + qb.setupConfigurableCapacities(); + + SchedulerEvent addAppEvent; + SchedulerEvent addAttemptEvent; + Container container = mock(Container.class); + ApplicationSubmissionContext submissionContext = + mock(ApplicationSubmissionContext.class); + + final int appCount = 100; + ApplicationId[] appids = new ApplicationId[appCount]; + RMAppAttemptImpl[] attempts = new RMAppAttemptImpl[appCount]; + ApplicationAttemptId[] appAttemptIds = new ApplicationAttemptId[appCount]; + RMAppImpl[] apps = new RMAppImpl[appCount]; + RMAppAttemptMetrics[] attemptMetrics = new RMAppAttemptMetrics[appCount]; + for (int i=0; i 2) { + for (int i = 2; i < numOfResourceTypes; i++) { + nodeResource.setResourceValue(getResourceName(i), 10); + } + } + + RMNode node = MockNodes.newNodeInfo(0, nodeResource, 1, "127.0.0.1"); + cs.handle(new NodeAddedSchedulerEvent(node)); + + RMNode node2 = MockNodes.newNodeInfo(0, nodeResource, 1, "127.0.0.2"); + cs.handle(new NodeAddedSchedulerEvent(node2)); + + Priority u0Priority = TestUtils.createMockPriority(1); + RecordFactory recordFactory = + RecordFactoryProvider.getRecordFactory(null); + + FiCaSchedulerApp[] fiCaApps = new FiCaSchedulerApp[appCount]; + for (int i=0;i 2) { + for (int j = 2; j < numOfResourceTypes; j++) { + resourceRequest.getCapability().setResourceValue(getResourceName(j), + 10); + } + } + + // allocate container for app2 with 1GB memory and 1 vcore + fiCaApps[i].updateResourceRequests( + Collections.singletonList(resourceRequest)); + } + // Now force everything to be over user limit + qb.setUserLimitFactor((float)0.0); + + // Quiet the loggers while measuring throughput + for (Enumeration loggers = LogManager.getCurrentLoggers(); + loggers.hasMoreElements(); ) { + Logger logger = (Logger) loggers.nextElement(); + logger.setLevel(Level.WARN); + } + final int topn = 20; + final int iterations = 2000000; + final int printInterval = 20000; + final float numerator = 1000.0f * printInterval; + PriorityQueue queue = new PriorityQueue<>(topn, + Collections.reverseOrder()); + + long n = Time.monotonicNow(); + long timespent = 0; + for (int i = 0; i < iterations; i+=2) { + if (i > 0 && i % printInterval == 0){ + long ts = (Time.monotonicNow() - n); + if (queue.size() < topn) { + queue.offer(ts); + } else { + Long last = queue.peek(); + if (last > ts) { + queue.poll(); + queue.offer(ts); + } + } + System.out.println(i + " " + (numerator / ts)); + n= Time.monotonicNow(); + } + cs.handle(new NodeUpdateSchedulerEvent(node)); + cs.handle(new NodeUpdateSchedulerEvent(node2)); + } + timespent=0; + int entries = queue.size(); + while(queue.size() > 0){ + long l = queue.poll(); + timespent += l; + } + System.out.println( + "#ResourceTypes = " + numOfResourceTypes + ". Avg of fastest " + entries + + ": " + numerator / (timespent / entries)); + rm.stop(); + } + + @Test(timeout = 300000) + public void testUserLimitThroughputForTwoResources() throws Exception { + testUserLimitThroughputWithNumberOfResourceTypes(2); + } + + @Test(timeout = 300000) + public void testUserLimitThroughputForThreeResources() throws Exception { + testUserLimitThroughputWithNumberOfResourceTypes(3); + } + + @Test(timeout = 300000) + public void testUserLimitThroughputForFourResources() throws Exception { + testUserLimitThroughputWithNumberOfResourceTypes(4); + } + + @Test(timeout = 300000) + public void testUserLimitThroughputForFiveResources() throws Exception { + testUserLimitThroughputWithNumberOfResourceTypes(5); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java index dd6b25b78c8..906febfde74 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java @@ -24,6 +24,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.security.SecurityUtilTestHelper; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.records.Container; @@ -887,4 +888,53 @@ public void testQueuePriorityOrdering() throws Exception { rm1.close(); } + + + + @Test(timeout = 60000) + public void testUserLimitAllocationMultipleContainers() throws Exception { + CapacitySchedulerConfiguration newConf = + (CapacitySchedulerConfiguration) TestUtils + .getConfigurationWithMultipleQueues(conf); + newConf.setUserLimit("root.c", 50); + MockRM rm1 = new MockRM(newConf); + + rm1.getRMContext().setNodeLabelManager(mgr); + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 1000 * GB); + + // launch app from 1st user to queue C, AM container should be launched in nm1 + RMApp app1 = rm1.submitApp(2 * GB, "app", "user1", null, "c"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + // launch app from 2nd user to queue C, AM container should be launched in nm1 + RMApp app2 = rm1.submitApp(2 * GB, "app", "user2", null, "c"); + MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm1); + + // Each application asks 1000 * 5GB containers + am1.allocate("*", 5 * GB, 1000, null); + am1.allocate("h1", 5 * GB, 1000, null); + am1.allocate(NetworkTopology.DEFAULT_RACK, 5 * GB, 1000, null); + + // Each application asks 1000 * 5GB containers + am2.allocate("*", 5 * GB, 1000, null); + am2.allocate("h1", 5 * GB, 1000, null); + am2.allocate(NetworkTopology.DEFAULT_RACK, 5 * GB, 1000, null); + + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + + FiCaSchedulerApp schedulerApp1 = + cs.getApplicationAttempt(am1.getApplicationAttemptId()); + FiCaSchedulerApp schedulerApp2 = + cs.getApplicationAttempt(am2.getApplicationAttemptId()); + + // container will be allocated to am1 + // App1 will get 2 container allocated (plus AM container) + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + Assert.assertEquals(101, schedulerApp1.getLiveContainers().size()); + Assert.assertEquals(100, schedulerApp2.getLiveContainers().size()); + + rm1.close(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index d45f756a2e9..a32352b3af2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -1252,7 +1252,7 @@ public void testComputeUserLimitAndSetHeadroom() throws IOException { //app4 is user 0 //maxqueue 16G, userlimit 7G, used 8G, headroom 5G //(8G used is 6G from this test case - app4, 2 from last test case, app_1) - assertEquals(0*GB, app_4.getHeadroom().getMemorySize()); + assertEquals(1*GB, app_4.getHeadroom().getMemorySize()); } @Test @@ -1436,7 +1436,7 @@ public void testHeadroomWithMaxCap() throws Exception { assertEquals(2*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); // TODO, fix headroom in the future patch - assertEquals(0*GB, app_0.getHeadroom().getMemorySize()); + assertEquals(1*GB, app_0.getHeadroom().getMemorySize()); // User limit = 2G, 2 in use assertEquals(0*GB, app_1.getHeadroom().getMemorySize()); // the application is not yet active @@ -1449,8 +1449,8 @@ public void testHeadroomWithMaxCap() throws Exception { assertEquals(3*GB, a.getUsedResources().getMemorySize()); assertEquals(2*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(1*GB, app_1.getCurrentConsumption().getMemorySize()); - assertEquals(0*GB, app_0.getHeadroom().getMemorySize()); // 4G - 3G - assertEquals(0*GB, app_1.getHeadroom().getMemorySize()); // 4G - 3G + assertEquals(1*GB, app_0.getHeadroom().getMemorySize()); // 4G - 3G + assertEquals(1*GB, app_1.getHeadroom().getMemorySize()); // 4G - 3G // Submit requests for app_1 and set max-cap a.setMaxCapacity(.1f); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java index 5e6548bc80e..32f022f7156 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java @@ -896,8 +896,7 @@ public void testGetAppToUnreserve() throws Exception { String host_1 = "host_1"; FiCaSchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0, 8 * GB); - - Resource clusterResource = Resources.createResource(2 * 8 * GB); + // Setup resource-requests Priority p = TestUtils.createMockPriority(5); @@ -933,28 +932,27 @@ public void testGetAppToUnreserve() throws Exception { node_0.getNodeID(), "user", rmContext); // no reserved containers - NodeId unreserveId = - app_0.getNodeIdToUnreserve(priorityMap, capability, - cs.getResourceCalculator(), clusterResource); + NodeId unreserveId = app_0.getNodeIdToUnreserve(priorityMap, capability, + cs.getResourceCalculator()); assertEquals(null, unreserveId); // no reserved containers - reserve then unreserve app_0.reserve(node_0, priorityMap, rmContainer_1, container_1); app_0.unreserve(priorityMap, node_0, rmContainer_1); unreserveId = app_0.getNodeIdToUnreserve(priorityMap, capability, - cs.getResourceCalculator(), clusterResource); + cs.getResourceCalculator()); assertEquals(null, unreserveId); // no container large enough is reserved app_0.reserve(node_0, priorityMap, rmContainer_1, container_1); unreserveId = app_0.getNodeIdToUnreserve(priorityMap, capability, - cs.getResourceCalculator(), clusterResource); + cs.getResourceCalculator()); assertEquals(null, unreserveId); // reserve one that is now large enough app_0.reserve(node_1, priorityMap, rmContainer, container); unreserveId = app_0.getNodeIdToUnreserve(priorityMap, capability, - cs.getResourceCalculator(), clusterResource); + cs.getResourceCalculator()); assertEquals(node_1.getNodeID(), unreserveId); } @@ -1001,16 +999,14 @@ public void testFindNodeToUnreserve() throws Exception { node_1.getNodeID(), "user", rmContext); // nothing reserved - RMContainer toUnreserveContainer = - app_0.findNodeToUnreserve(csContext.getClusterResource(), node_1, + RMContainer toUnreserveContainer = app_0.findNodeToUnreserve(node_1, priorityMap, capability); assertTrue(toUnreserveContainer == null); // reserved but scheduler doesn't know about that node. app_0.reserve(node_1, priorityMap, rmContainer, container); node_1.reserveResource(app_0, priorityMap, rmContainer); - toUnreserveContainer = - app_0.findNodeToUnreserve(csContext.getClusterResource(), node_1, + toUnreserveContainer = app_0.findNodeToUnreserve(node_1, priorityMap, capability); assertTrue(toUnreserveContainer == null); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java index 36ff85e5a46..03332b25e2c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java @@ -20,8 +20,6 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; -import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.util.Records; import org.apache.hadoop.yarn.util.resource.Resources; @@ -33,7 +31,7 @@ public class FakeSchedulable implements Schedulable { private Resource minShare; private Resource maxShare; private Resource fairShare; - private ResourceWeights weights; + private float weights; private Priority priority; private long startTime; @@ -49,28 +47,31 @@ public FakeSchedulable(int minShare, int maxShare) { this(minShare, maxShare, 1, 0, 0, 0); } - public FakeSchedulable(int minShare, double memoryWeight) { + public FakeSchedulable(int minShare, float memoryWeight) { this(minShare, Integer.MAX_VALUE, memoryWeight, 0, 0, 0); } - public FakeSchedulable(int minShare, int maxShare, double memoryWeight) { + public FakeSchedulable(int minShare, int maxShare, float memoryWeight) { this(minShare, maxShare, memoryWeight, 0, 0, 0); } - public FakeSchedulable(int minShare, int maxShare, double weight, int fairShare, int usage, - long startTime) { - this(Resources.createResource(minShare, 0), Resources.createResource(maxShare, 0), - new ResourceWeights((float)weight), Resources.createResource(fairShare, 0), + public FakeSchedulable(int minShare, int maxShare, float weight, + int fairShare, int usage, long startTime) { + this(Resources.createResource(minShare, 0), + Resources.createResource(maxShare, 0), + weight, Resources.createResource(fairShare, 0), Resources.createResource(usage, 0), startTime); } - public FakeSchedulable(Resource minShare, ResourceWeights weights) { - this(minShare, Resources.createResource(Integer.MAX_VALUE, Integer.MAX_VALUE), - weights, Resources.createResource(0, 0), Resources.createResource(0, 0), 0); + public FakeSchedulable(Resource minShare, float weights) { + this(minShare, + Resources.createResource(Integer.MAX_VALUE, Integer.MAX_VALUE), + weights, Resources.createResource(0, 0), + Resources.createResource(0, 0), 0); } public FakeSchedulable(Resource minShare, Resource maxShare, - ResourceWeights weight, Resource fairShare, Resource usage, long startTime) { + float weight, Resource fairShare, Resource usage, long startTime) { this.minShare = minShare; this.maxShare = maxShare; this.weights = weight; @@ -121,7 +122,7 @@ public long getStartTime() { } @Override - public ResourceWeights getWeights() { + public float getWeight() { return weights; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestComputeFairShares.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestComputeFairShares.java index 4f3ccb2acd4..c3bcb3b2179 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestComputeFairShares.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestComputeFairShares.java @@ -20,12 +20,11 @@ import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.junit.Assert; import org.apache.hadoop.yarn.util.resource.Resources; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceType; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.ComputeFairShares; import org.junit.Before; import org.junit.Test; @@ -52,7 +51,7 @@ public void testEqualSharing() { scheds.add(new FakeSchedulable()); scheds.add(new FakeSchedulable()); ComputeFairShares.computeShares(scheds, - Resources.createResource(40), ResourceType.MEMORY); + Resources.createResource(40), ResourceInformation.MEMORY_MB.getName()); verifyMemoryShares(10, 10, 10, 10); } @@ -70,7 +69,7 @@ public void testLowMaxShares() { scheds.add(new FakeSchedulable(0, 11)); scheds.add(new FakeSchedulable(0, 3)); ComputeFairShares.computeShares(scheds, - Resources.createResource(40), ResourceType.MEMORY); + Resources.createResource(40), ResourceInformation.MEMORY_MB.getName()); verifyMemoryShares(13, 13, 11, 3); } @@ -90,7 +89,7 @@ public void testMinShares() { scheds.add(new FakeSchedulable(0)); scheds.add(new FakeSchedulable(2)); ComputeFairShares.computeShares(scheds, - Resources.createResource(40), ResourceType.MEMORY); + Resources.createResource(40), ResourceInformation.MEMORY_MB.getName()); verifyMemoryShares(20, 18, 0, 2); } @@ -100,12 +99,12 @@ public void testMinShares() { */ @Test public void testWeightedSharing() { - scheds.add(new FakeSchedulable(0, 2.0)); - scheds.add(new FakeSchedulable(0, 1.0)); - scheds.add(new FakeSchedulable(0, 1.0)); - scheds.add(new FakeSchedulable(0, 0.5)); + scheds.add(new FakeSchedulable(0, 2.0f)); + scheds.add(new FakeSchedulable(0, 1.0f)); + scheds.add(new FakeSchedulable(0, 1.0f)); + scheds.add(new FakeSchedulable(0, 0.5f)); ComputeFairShares.computeShares(scheds, - Resources.createResource(45), ResourceType.MEMORY); + Resources.createResource(45), ResourceInformation.MEMORY_MB.getName()); verifyMemoryShares(20, 10, 10, 5); } @@ -118,12 +117,12 @@ public void testWeightedSharing() { */ @Test public void testWeightedSharingWithMaxShares() { - scheds.add(new FakeSchedulable(0, 10, 2.0)); - scheds.add(new FakeSchedulable(0, 11, 1.0)); - scheds.add(new FakeSchedulable(0, 30, 1.0)); - scheds.add(new FakeSchedulable(0, 20, 0.5)); + scheds.add(new FakeSchedulable(0, 10, 2.0f)); + scheds.add(new FakeSchedulable(0, 11, 1.0f)); + scheds.add(new FakeSchedulable(0, 30, 1.0f)); + scheds.add(new FakeSchedulable(0, 20, 0.5f)); ComputeFairShares.computeShares(scheds, - Resources.createResource(45), ResourceType.MEMORY); + Resources.createResource(45), ResourceInformation.MEMORY_MB.getName()); verifyMemoryShares(10, 11, 16, 8); } @@ -137,12 +136,12 @@ public void testWeightedSharingWithMaxShares() { */ @Test public void testWeightedSharingWithMinShares() { - scheds.add(new FakeSchedulable(20, 2.0)); - scheds.add(new FakeSchedulable(0, 1.0)); - scheds.add(new FakeSchedulable(5, 1.0)); - scheds.add(new FakeSchedulable(15, 0.5)); + scheds.add(new FakeSchedulable(20, 2.0f)); + scheds.add(new FakeSchedulable(0, 1.0f)); + scheds.add(new FakeSchedulable(5, 1.0f)); + scheds.add(new FakeSchedulable(15, 0.5f)); ComputeFairShares.computeShares(scheds, - Resources.createResource(45), ResourceType.MEMORY); + Resources.createResource(45), ResourceInformation.MEMORY_MB.getName()); verifyMemoryShares(20, 5, 5, 15); } @@ -158,7 +157,8 @@ public void testLargeShares() { scheds.add(new FakeSchedulable()); scheds.add(new FakeSchedulable()); ComputeFairShares.computeShares(scheds, - Resources.createResource(40 * million), ResourceType.MEMORY); + Resources.createResource(40 * million), + ResourceInformation.MEMORY_MB.getName()); verifyMemoryShares(10 * million, 10 * million, 10 * million, 10 * million); } @@ -168,7 +168,7 @@ public void testLargeShares() { @Test public void testEmptyList() { ComputeFairShares.computeShares(scheds, - Resources.createResource(40), ResourceType.MEMORY); + Resources.createResource(40), ResourceInformation.MEMORY_MB.getName()); verifyMemoryShares(); } @@ -177,16 +177,12 @@ public void testEmptyList() { */ @Test public void testCPU() { - scheds.add(new FakeSchedulable(Resources.createResource(0, 20), - new ResourceWeights(2.0f))); - scheds.add(new FakeSchedulable(Resources.createResource(0, 0), - new ResourceWeights(1.0f))); - scheds.add(new FakeSchedulable(Resources.createResource(0, 5), - new ResourceWeights(1.0f))); - scheds.add(new FakeSchedulable(Resources.createResource(0, 15), - new ResourceWeights(0.5f))); + scheds.add(new FakeSchedulable(Resources.createResource(0, 20), 2.0f)); + scheds.add(new FakeSchedulable(Resources.createResource(0, 0), 1.0f)); + scheds.add(new FakeSchedulable(Resources.createResource(0, 5), 1.0f)); + scheds.add(new FakeSchedulable(Resources.createResource(0, 15), 0.5f)); ComputeFairShares.computeShares(scheds, - Resources.createResource(0, 45), ResourceType.CPU); + Resources.createResource(0, 45), ResourceInformation.VCORES.getName()); verifyCPUShares(20, 5, 5, 15); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java index 9efa83d99f6..854a65c2831 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java @@ -285,24 +285,19 @@ public void testInterruptedException() } @Test - public void testThreadLifeCycle() throws InterruptedException { + public void testSchedulerThreadLifeCycle() throws InterruptedException { scheduler.start(); - Thread updateThread = scheduler.updateThread; Thread schedulingThread = scheduler.schedulingThread; - - assertTrue(updateThread.isAlive()); assertTrue(schedulingThread.isAlive()); - scheduler.stop(); int numRetries = 100; - while (numRetries-- > 0 && - (updateThread.isAlive() || schedulingThread.isAlive())) { + while (numRetries-- > 0 && schedulingThread.isAlive()) { Thread.sleep(50); } - assertNotEquals("One of the threads is still alive", 0, numRetries); + assertNotEquals("The Scheduling thread is still alive", 0, numRetries); } @Test diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 941c215df4c..0ef4d7ba5ea 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -79,7 +79,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl; import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; @@ -113,6 +112,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FifoPolicy; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.ControlledClock; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.util.resource.Resources; import org.junit.After; import org.junit.Assert; @@ -212,6 +212,7 @@ public void testLoadConfigurationOnInitialize() throws IOException { conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 512); conf.setInt(FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, 128); + ResourceUtils.resetResourceTypes(conf); scheduler.init(conf); scheduler.start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); @@ -240,6 +241,7 @@ public void testNonMinZeroResourcesSettings() throws IOException { FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, 512); conf.setInt( FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_VCORES, 2); + ResourceUtils.resetResourceTypes(conf); scheduler.init(conf); scheduler.reinitialize(conf, null); Assert.assertEquals(256, scheduler.getMinimumResourceCapability().getMemorySize()); @@ -257,6 +259,7 @@ public void testMinZeroResourcesSettings() throws IOException { FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, 512); conf.setInt( FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_VCORES, 2); + ResourceUtils.resetResourceTypes(conf); scheduler.init(conf); scheduler.reinitialize(conf, null); Assert.assertEquals(0, scheduler.getMinimumResourceCapability().getMemorySize()); @@ -1980,7 +1983,7 @@ public void testFairShareAndWeightsInNestedUserQueueRule() throws Exception { // assert that the steady fair share is 1/4th node1's capacity assertEquals(capacity / 4, leaf.getSteadyFairShare().getMemorySize()); // assert weights are equal for both the user queues - assertEquals(1.0, leaf.getWeights().getWeight(ResourceType.MEMORY), 0); + assertEquals(1.0, leaf.getWeight(), 0); } } } @@ -5271,7 +5274,7 @@ public void testDumpState() throws IOException { child1.updateDemand(); String childQueueString = "{Name: root.parent.child1," - + " Weight: ," + + " Weight: 1.0," + " Policy: fair," + " FairShare: ," + " SteadyFairShare: ," @@ -5288,14 +5291,15 @@ public void testDumpState() throws IOException { + " LastTimeAtMinShare: " + clock.getTime() + "}"; - assertTrue(child1.dumpState().equals(childQueueString)); + assertEquals("Unexpected state dump string", + childQueueString, child1.dumpState()); FSParentQueue parent = scheduler.getQueueManager().getParentQueue("parent", false); parent.setMaxShare(resource); parent.updateDemand(); String parentQueueString = "{Name: root.parent," - + " Weight: ," + + " Weight: 1.0," + " Policy: fair," + " FairShare: ," + " SteadyFairShare: ," @@ -5306,7 +5310,7 @@ public void testDumpState() throws IOException { + " MaxAMShare: 0.5," + " Runnable: 0}"; - assertTrue(parent.dumpState().equals( - parentQueueString + ", " + childQueueString)); + assertEquals("Unexpected state dump string", + parentQueueString + ", " + childQueueString, parent.dumpState()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestSchedulingPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestSchedulingPolicy.java index 3a16454c10a..b016c1b4fb8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestSchedulingPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestSchedulingPolicy.java @@ -30,7 +30,6 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.DominantResourceFairnessPolicy; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FairSharePolicy; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FifoPolicy; @@ -134,11 +133,7 @@ private class FairShareComparatorTester { Resource.newInstance(0, 1), Resource.newInstance(2, 1), Resource.newInstance(4, 1) }; - private ResourceWeights[] weightsCollection = { - new ResourceWeights(0.0f), new ResourceWeights(1.0f), - new ResourceWeights(2.0f) }; - - + private float[] weightsCollection = {0.0f, 1.0f, 2.0f}; public FairShareComparatorTester( Comparator fairShareComparator) { @@ -225,10 +220,10 @@ private class MockSchedulable implements Schedulable { private String name; private long startTime; private Resource usage; - private ResourceWeights weights; + private float weights; public MockSchedulable(Resource minShare, Resource demand, String name, - long startTime, Resource usage, ResourceWeights weights) { + long startTime, Resource usage, float weights) { this.minShare = minShare; this.demand = demand; this.name = name; @@ -258,7 +253,7 @@ public Resource getMinShare() { } @Override - public ResourceWeights getWeights() { + public float getWeight() { return weights; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/TestDominantResourceFairnessPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/TestDominantResourceFairnessPolicy.java index 3719e2aee08..097558feb18 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/TestDominantResourceFairnessPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/TestDominantResourceFairnessPolicy.java @@ -23,15 +23,22 @@ import static org.mockito.Mockito.when; import java.util.Comparator; +import java.util.Map; +import org.apache.curator.shaded.com.google.common.base.Joiner; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceType; -import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; +import org.apache.hadoop.yarn.api.records.ResourceInformation; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSContext; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FakeSchedulable; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Schedulable; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.DominantResourceFairnessPolicy.DominantResourceFairnessComparator; import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.util.resource.Resources; +import org.junit.Assert; +import org.junit.BeforeClass; import org.junit.Test; /** @@ -39,10 +46,15 @@ * container before sched2 */ public class TestDominantResourceFairnessPolicy { + @BeforeClass + public static void setup() { + addResources("test"); + } private Comparator createComparator(int clusterMem, int clusterCpu) { - DominantResourceFairnessPolicy policy = new DominantResourceFairnessPolicy(); + DominantResourceFairnessPolicy policy = + new DominantResourceFairnessPolicy(); FSContext fsContext = mock(FSContext.class); when(fsContext.getClusterResource()). thenReturn(Resources.createResource(clusterMem, clusterCpu)); @@ -51,23 +63,23 @@ private Comparator createComparator(int clusterMem, } private Schedulable createSchedulable(int memUsage, int cpuUsage) { - return createSchedulable(memUsage, cpuUsage, ResourceWeights.NEUTRAL, 0, 0); + return createSchedulable(memUsage, cpuUsage, 1.0f, 0, 0); } private Schedulable createSchedulable(int memUsage, int cpuUsage, int minMemShare, int minCpuShare) { - return createSchedulable(memUsage, cpuUsage, ResourceWeights.NEUTRAL, + return createSchedulable(memUsage, cpuUsage, 1.0f, minMemShare, minCpuShare); } private Schedulable createSchedulable(int memUsage, int cpuUsage, - ResourceWeights weights) { + float weights) { return createSchedulable(memUsage, cpuUsage, weights, 0, 0); } private Schedulable createSchedulable(int memUsage, int cpuUsage, - ResourceWeights weights, int minMemShare, int minCpuShare) { + float weights, int minMemShare, int minCpuShare) { Resource usage = BuilderUtils.newResource(memUsage, cpuUsage); Resource minShare = BuilderUtils.newResource(minMemShare, minCpuShare); return new FakeSchedulable(minShare, @@ -77,94 +89,260 @@ private Schedulable createSchedulable(int memUsage, int cpuUsage, @Test public void testSameDominantResource() { - assertTrue(createComparator(8000, 4).compare( - createSchedulable(1000, 1), - createSchedulable(2000, 1)) < 0); + Comparator c = createComparator(8000, 4); + Schedulable s1 = createSchedulable(1000, 1); + Schedulable s2 = createSchedulable(2000, 1); + + assertTrue("Comparison didn't return a value less than 0", + c.compare(s1, s2) < 0); } @Test public void testDifferentDominantResource() { - assertTrue(createComparator(8000, 8).compare( - createSchedulable(4000, 3), - createSchedulable(2000, 5)) < 0); + Comparator c = createComparator(8000, 8); + Schedulable s1 = createSchedulable(4000, 3); + Schedulable s2 = createSchedulable(2000, 5); + + assertTrue("Comparison didn't return a value less than 0", + c.compare(s1, s2) < 0); } @Test public void testOneIsNeedy() { - assertTrue(createComparator(8000, 8).compare( - createSchedulable(2000, 5, 0, 6), - createSchedulable(4000, 3, 0, 0)) < 0); + Comparator c = createComparator(8000, 8); + Schedulable s1 = createSchedulable(2000, 5, 0, 6); + Schedulable s2 = createSchedulable(4000, 3, 0, 0); + + assertTrue("Comparison didn't return a value less than 0", + c.compare(s1, s2) < 0); } @Test public void testBothAreNeedy() { - assertTrue(createComparator(8000, 100).compare( - // dominant share is 2000/8000 - createSchedulable(2000, 5), - // dominant share is 4000/8000 - createSchedulable(4000, 3)) < 0); - assertTrue(createComparator(8000, 100).compare( - // dominant min share is 2/3 - createSchedulable(2000, 5, 3000, 6), - // dominant min share is 4/5 - createSchedulable(4000, 3, 5000, 4)) < 0); + Comparator c = createComparator(8000, 100); + // dominant share is 2000/8000 + Schedulable s1 = createSchedulable(2000, 5); + // dominant share is 4000/8000 + Schedulable s2 = createSchedulable(4000, 3); + + assertTrue("Comparison didn't return a value less than 0", + c.compare(s1, s2) < 0); + + // dominant min share is 2/3 + s1 = createSchedulable(2000, 5, 3000, 6); + // dominant min share is 4/5 + s2 = createSchedulable(4000, 3, 5000, 4); + + assertTrue("Comparison didn't return a value less than 0", + c.compare(s1, s2) < 0); } @Test public void testEvenWeightsSameDominantResource() { assertTrue(createComparator(8000, 8).compare( - createSchedulable(3000, 1, new ResourceWeights(2.0f)), + createSchedulable(3000, 1, 2.0f), createSchedulable(2000, 1)) < 0); assertTrue(createComparator(8000, 8).compare( - createSchedulable(1000, 3, new ResourceWeights(2.0f)), + createSchedulable(1000, 3, 2.0f), createSchedulable(1000, 2)) < 0); } @Test public void testEvenWeightsDifferentDominantResource() { assertTrue(createComparator(8000, 8).compare( - createSchedulable(1000, 3, new ResourceWeights(2.0f)), + createSchedulable(1000, 3, 2.0f), createSchedulable(2000, 1)) < 0); assertTrue(createComparator(8000, 8).compare( - createSchedulable(3000, 1, new ResourceWeights(2.0f)), + createSchedulable(3000, 1, 2.0f), createSchedulable(1000, 2)) < 0); } @Test - public void testUnevenWeightsSameDominantResource() { - assertTrue(createComparator(8000, 8).compare( - createSchedulable(3000, 1, new ResourceWeights(2.0f, 1.0f)), - createSchedulable(2000, 1)) < 0); - assertTrue(createComparator(8000, 8).compare( - createSchedulable(1000, 3, new ResourceWeights(1.0f, 2.0f)), - createSchedulable(1000, 2)) < 0); + public void testSortShares() { + float[][] ratios1 = {{0.3f, 2.0f}, {0.2f, 1.0f}, {0.4f, 0.1f}}; + float[][] ratios2 = {{0.2f, 9.0f}, {0.3f, 2.0f}, {0.25f, 0.1f}}; + float[][] expected1 = {{0.4f, 0.1f}, {0.3f, 2.0f}, {0.2f, 1.0f}}; + float[][] expected2 = {{0.3f, 2.0f}, {0.25f, 0.1f}, {0.2f, 9.0f}}; + DominantResourceFairnessComparator comparator = + new DominantResourceFairnessComparator(); + + comparator.sortRatios(ratios1, ratios2); + + for (int i = 0; i < ratios1.length; i++) { + Assert.assertArrayEquals("The shares array was not sorted into the " + + "expected order: incorrect inner array encountered", + expected1[i], ratios1[i], 0.00001f); + Assert.assertArrayEquals("The shares array was not sorted into the " + + "expected order: incorrect inner array encountered", + expected2[i], ratios2[i], 0.00001f); + } } - + @Test - public void testUnevenWeightsDifferentDominantResource() { - assertTrue(createComparator(8000, 8).compare( - createSchedulable(1000, 3, new ResourceWeights(1.0f, 2.0f)), - createSchedulable(2000, 1)) < 0); - assertTrue(createComparator(8000, 8).compare( - createSchedulable(3000, 1, new ResourceWeights(2.0f, 1.0f)), - createSchedulable(1000, 2)) < 0); - } - - @Test - public void testCalculateShares() { + public void testCalculateClusterAndFairRatios() { + Map index = ResourceUtils.getResourceTypeIndex(); Resource used = Resources.createResource(10, 5); Resource capacity = Resources.createResource(100, 10); - ResourceType[] resourceOrder = new ResourceType[2]; - ResourceWeights shares = new ResourceWeights(); - DominantResourceFairnessPolicy.DominantResourceFairnessComparator comparator = - new DominantResourceFairnessPolicy.DominantResourceFairnessComparator(); - comparator.calculateShares(used, capacity, shares, resourceOrder, - ResourceWeights.NEUTRAL); - - assertEquals(.1, shares.getWeight(ResourceType.MEMORY), .00001); - assertEquals(.5, shares.getWeight(ResourceType.CPU), .00001); - assertEquals(ResourceType.CPU, resourceOrder[0]); - assertEquals(ResourceType.MEMORY, resourceOrder[1]); + float[][] shares = new float[3][2]; + DominantResourceFairnessComparator comparator = + new DominantResourceFairnessComparator(); + + used.setResourceValue("test", 2L); + capacity.setResourceValue("test", 5L); + + int dominant = comparator.calculateClusterAndFairRatios(used, capacity, + shares, 1.0f); + + assertEquals("Calculated usage ratio for memory (10MB out of 100MB) is " + + "incorrect", 0.1, + shares[index.get(ResourceInformation.MEMORY_MB.getName())][0], .00001); + assertEquals("Calculated usage ratio for vcores (5 out of 10) is " + + "incorrect", 0.5, + shares[index.get(ResourceInformation.VCORES.getName())][0], .00001); + assertEquals("Calculated usage ratio for test resource (2 out of 5) is " + + "incorrect", 0.4, shares[index.get("test")][0], .00001); + assertEquals("The wrong dominant resource index was returned", + index.get(ResourceInformation.VCORES.getName()).intValue(), + dominant); + } + + @Test + public void testCalculateMinShareRatios() { + Map index = ResourceUtils.getResourceTypeIndex(); + Resource used = Resources.createResource(10, 5); + Resource minShares = Resources.createResource(5, 10); + float[][] ratios = new float[3][3]; + DominantResourceFairnessComparator comparator = + new DominantResourceFairnessComparator(); + + used.setResourceValue("test", 2L); + minShares.setResourceValue("test", 0L); + + comparator.calculateMinShareRatios(used, minShares, ratios); + + assertEquals("Calculated min share ratio for memory (10MB out of 5MB) is " + + "incorrect", 2.0, + ratios[index.get(ResourceInformation.MEMORY_MB.getName())][2], .00001f); + assertEquals("Calculated min share ratio for vcores (5 out of 10) is " + + "incorrect", 0.5, + ratios[index.get(ResourceInformation.VCORES.getName())][2], .00001f); + assertEquals("Calculated min share ratio for test resource (0 out of 5) is " + + "incorrect", Float.POSITIVE_INFINITY, ratios[index.get("test")][2], + 0.00001f); + } + + @Test + public void testCompareShares() { + float[][] ratios1 = { + {0.4f, 0.1f, 2.0f}, + {0.3f, 2.0f, 0.1f}, + {0.2f, 1.0f, 9.0f} + }; + float[][] ratios2 = { + {0.3f, 2.0f, 1.0f}, + {0.2f, 0.1f, 0.5f}, + {0.2f, 1.0f, 2.0f} + }; + float[][] ratios3 = { + {0.3f, 2.0f, 1.0f}, + {0.2f, 0.1f, 2.0f}, + {0.1f, 2.0f, 1.0f} + }; + DominantResourceFairnessComparator comparator = + new DominantResourceFairnessComparator(); + + int ret = comparator.compareRatios(ratios1, ratios2, 0); + + assertEquals("Expected the first array to be larger because the first " + + "usage ratio element is larger", 1, ret); + + ret = comparator.compareRatios(ratios2, ratios1, 0); + + assertEquals("Expected the first array to be smaller because the first " + + "usage ratio element is smaller", -1, ret); + + ret = comparator.compareRatios(ratios1, ratios1, 0); + + assertEquals("Expected the arrays to be equal, since they're the same " + + "array", 0, ret); + + ret = comparator.compareRatios(ratios2, ratios2, 0); + + assertEquals("Expected the arrays to be equal, since they're the same " + + "array", 0, ret); + + ret = comparator.compareRatios(ratios3, ratios3, 0); + + assertEquals("Expected the arrays to be equal, since they're the same " + + "array", 0, ret); + + ret = comparator.compareRatios(ratios2, ratios3, 0); + + assertEquals("Expected the first array to be larger because the last " + + "usage ratio element is larger, and all other elements are equal", + 1, ret); + + ret = comparator.compareRatios(ratios1, ratios2, 1); + + assertEquals("Expected the first array to be smaller because the first " + + "fair share ratio element is smaller", -1, ret); + + ret = comparator.compareRatios(ratios2, ratios1, 1); + + assertEquals("Expected the first array to be larger because the first " + + "fair share ratio element is larger", 1, ret); + + ret = comparator.compareRatios(ratios1, ratios1, 1); + + assertEquals("Expected the arrays to be equal, since they're the same " + + "array", 0, ret); + + ret = comparator.compareRatios(ratios2, ratios2, 1); + + assertEquals("Expected the arrays to be equal, since they're the same " + + "array", 0, ret); + + ret = comparator.compareRatios(ratios3, ratios3, 1); + + assertEquals("Expected the arrays to be equal, since they're the same " + + "array", 0, ret); + + ret = comparator.compareRatios(ratios2, ratios3, 1); + + assertEquals("Expected the first array to be smaller because the last " + + "usage ratio element is smaller, and all other elements are equal", + -1, ret); + + ret = comparator.compareRatios(ratios1, ratios2, 2); + + assertEquals("Expected the first array to be larger because the first " + + "min share ratio element is larger", 1, ret); + + ret = comparator.compareRatios(ratios2, ratios1, 2); + + assertEquals("Expected the first array to be smaller because the first " + + "min share ratio element is smaller", -1, ret); + + ret = comparator.compareRatios(ratios1, ratios1, 2); + + assertEquals("Expected the arrays to be equal, since they're the same " + + "array", 0, ret); + + ret = comparator.compareRatios(ratios2, ratios2, 2); + + assertEquals("Expected the arrays to be equal, since they're the same " + + "array", 0, ret); + + ret = comparator.compareRatios(ratios3, ratios3, 2); + + assertEquals("Expected the arrays to be equal, since they're the same " + + "array", 0, ret); + + ret = comparator.compareRatios(ratios2, ratios3, 2); + + assertEquals("Expected the first array to be smaller because the second " + + "min share ratio element is smaller, and all the first elements are " + + "equal", -1, ret); } @Test @@ -183,4 +361,12 @@ public void testCompareSchedulablesWithClusterResourceChanges(){ assertTrue(createComparator(8000, 6) .compare(schedulable1, schedulable2) < 0); } + + private static void addResources(String... resources) { + Configuration conf = new Configuration(); + + // Add a third resource to the allowed set + conf.set(YarnConfiguration.RESOURCE_TYPES, Joiner.on(',').join(resources)); + ResourceUtils.resetResourceTypes(conf); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/policy/TestFairOrderingPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/policy/TestFairOrderingPolicy.java index 37fc3b3474c..683173af709 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/policy/TestFairOrderingPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/policy/TestFairOrderingPolicy.java @@ -21,6 +21,7 @@ import java.util.*; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue; @@ -148,7 +149,10 @@ public void testSizeBasedWeightNotAffectAppActivation() throws Exception { // Define top-level queues String queuePath = CapacitySchedulerConfiguration.ROOT + ".default"; - csConf.setOrderingPolicy(queuePath, CapacitySchedulerConfiguration.FAIR_APP_ORDERING_POLICY); + csConf.set(YarnConfiguration.RM_SCHEDULER, + CapacityScheduler.class.getCanonicalName()); + csConf.setOrderingPolicy(queuePath, + CapacitySchedulerConfiguration.FAIR_APP_ORDERING_POLICY); csConf.setOrderingPolicyParameter(queuePath, FairOrderingPolicy.ENABLE_SIZE_BASED_WEIGHT, "true"); csConf.setMaximumApplicationMasterResourcePerQueuePercent(queuePath, 0.1f); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestClientToAMTokens.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestClientToAMTokens.java index d4e7727ad5e..a0f4007cf8a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestClientToAMTokens.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestClientToAMTokens.java @@ -86,6 +86,10 @@ public class TestClientToAMTokens extends ParameterizedSchedulerTestBase { private YarnConfiguration conf; + public TestClientToAMTokens(SchedulerType type) throws IOException { + super(type); + } + @Before public void setup() { conf = getConf(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestAppPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestAppPage.java index a009bc07d47..f88ac8b4d20 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestAppPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestAppPage.java @@ -22,8 +22,8 @@ import static org.mockito.Mockito.when; import java.io.IOException; +import java.util.HashMap; -import org.apache.hadoop.yarn.api.ApplicationBaseProtocol; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.Resource; @@ -63,9 +63,10 @@ public void testAppBlockRenderWithNullCurrentAppAttempt() throws Exception { when(app.getStartTime()).thenReturn(0L); when(app.getFinishTime()).thenReturn(0L); when(app.createApplicationState()).thenReturn(YarnApplicationState.FAILED); - - RMAppMetrics appMetrics = new RMAppMetrics( - Resource.newInstance(0, 0), 0, 0, 0, 0, 0, 0); + + RMAppMetrics appMetrics = + new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, new HashMap<>(), + new HashMap<>()); when(app.getRMAppMetrics()).thenReturn(appMetrics); // initialize RM Context, and create RMApp, without creating RMAppAttempt @@ -80,15 +81,13 @@ public void configure(Binder binder) { try { ResourceManager rm = TestRMWebApp.mockRm(rmContext); binder.bind(ResourceManager.class).toInstance(rm); - binder.bind(ApplicationBaseProtocol.class).toInstance( - rm.getClientRMService()); } catch (IOException e) { throw new IllegalStateException(e); } } }); - AppBlock instance = injector.getInstance(AppBlock.class); + AppBlock instance = injector.getInstance(RMAppBlock.class); instance.set(YarnWebParams.APPLICATION_ID, APP_ID.toString()); instance.render(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebApp.java index 8a6504002a1..b1256083ef8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebApp.java @@ -32,7 +32,6 @@ import java.util.concurrent.ConcurrentMap; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.yarn.api.ApplicationBaseProtocol; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -103,8 +102,6 @@ public void configure(Binder binder) { try { ResourceManager mockRm = mockRm(3, 1, 2, 8*GiB); binder.bind(ResourceManager.class).toInstance(mockRm); - binder.bind(ApplicationBaseProtocol.class) - .toInstance(mockRm.getClientRMService()); } catch (IOException e) { throw new IllegalStateException(e); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java index 78fadef03e5..cea0088d334 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java @@ -24,7 +24,6 @@ import com.google.inject.Module; import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.yarn.api.ApplicationBaseProtocol; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Resource; @@ -52,6 +51,7 @@ import java.io.IOException; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.concurrent.ConcurrentMap; @@ -75,8 +75,6 @@ public void configure(Binder binder) { mockRm(rmContext); binder.bind(ResourceManager.class).toInstance (mockRmWithFairScheduler); - binder.bind(ApplicationBaseProtocol.class).toInstance( - mockRmWithFairScheduler.getClientRMService()); } catch (IOException e) { throw new IllegalStateException(e); } @@ -115,9 +113,6 @@ public void configure(Binder binder) { mockRmWithApps(rmContext); binder.bind(ResourceManager.class).toInstance (mockRmWithFairScheduler); - binder.bind(ApplicationBaseProtocol.class).toInstance( - mockRmWithFairScheduler.getClientRMService()); - } catch (IOException e) { throw new IllegalStateException(e); } @@ -142,8 +137,8 @@ private static RMContext mockRMContext(List states) { MockRMApp app = new MockRMApp(i, i, state) { @Override public RMAppMetrics getRMAppMetrics() { - return new RMAppMetrics(Resource.newInstance(0, 0), - 0, 0, 0, 0, 0, 0); + return new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, + new HashMap<>(), new HashMap<>()); } @Override public YarnApplicationState createApplicationState() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java index 1cbdec3a225..07f74a358e7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java @@ -1528,7 +1528,7 @@ public void verifyAppsXML(NodeList nodes, RMApp app, boolean hasResourceReq) public void verifyAppInfo(JSONObject info, RMApp app, boolean hasResourceReqs) throws JSONException, Exception { - int expectedNumberOfElements = 36 + (hasResourceReqs ? 2 : 0); + int expectedNumberOfElements = 38 + (hasResourceReqs ? 2 : 0); String appNodeLabelExpression = null; String amNodeLabelExpression = null; if (app.getApplicationSubmissionContext() diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRedirectionErrorPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRedirectionErrorPage.java index 408dc9bb88a..eb0a62d783c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRedirectionErrorPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRedirectionErrorPage.java @@ -21,7 +21,6 @@ import java.io.IOException; -import org.apache.hadoop.yarn.api.ApplicationBaseProtocol; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; @@ -52,8 +51,6 @@ public void configure(Binder binder) { try { ResourceManager rm = TestRMWebApp.mockRm(rmContext); binder.bind(ResourceManager.class).toInstance(rm); - binder.bind(ApplicationBaseProtocol.class).toInstance( - rm.getClientRMService()); } catch (IOException e) { throw new IllegalStateException(e); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/profiles/illegal-profiles-1.json b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/profiles/illegal-profiles-1.json new file mode 100644 index 00000000000..b6aca96bd0f --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/profiles/illegal-profiles-1.json @@ -0,0 +1,10 @@ +{ + "minimum": { + "memoryMB" : 1024, + "vcores" : 1 + }, + "default" : { + "memoryMB" : 2048, + "vcores" : 2 + }, +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/profiles/illegal-profiles-2.json b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/profiles/illegal-profiles-2.json new file mode 100644 index 00000000000..d62a3116abe --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/profiles/illegal-profiles-2.json @@ -0,0 +1,10 @@ +{ + "minimum": { + "memoryMB" : 1024, + "vcores" : 1 + }, + "maximum" : { + "memoryMB": 4096, + "vcores" : 4 + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/profiles/illegal-profiles-3.json b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/profiles/illegal-profiles-3.json new file mode 100644 index 00000000000..9ee74dee611 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/profiles/illegal-profiles-3.json @@ -0,0 +1,10 @@ +{ + "default" : { + "memoryMB" : 2048, + "vcores" : 2 + }, + "maximum" : { + "memoryMB": 4096, + "vcores" : 4 + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/profiles/sample-profiles-1.json b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/profiles/sample-profiles-1.json new file mode 100644 index 00000000000..65b736078ba --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/profiles/sample-profiles-1.json @@ -0,0 +1,14 @@ +{ + "minimum": { + "memory-mb" : 1024, + "vcores" : 1 + }, + "default" : { + "memory-mb" : 2048, + "vcores" : 2 + }, + "maximum" : { + "memory-mb": 4096, + "vcores" : 4 + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/profiles/sample-profiles-2.json b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/profiles/sample-profiles-2.json new file mode 100644 index 00000000000..c2356710383 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/resources/profiles/sample-profiles-2.json @@ -0,0 +1,26 @@ +{ + "minimum": { + "memory-mb" : 1024, + "vcores" : 1 + }, + "default" : { + "memory-mb" : 2048, + "vcores" : 2 + }, + "maximum" : { + "memory-mb": 4096, + "vcores" : 4 + }, + "small" : { + "memory-mb": 1024, + "vcores": 1 + }, + "medium" : { + "memory-mb": 2048, + "vcores": 1 + }, + "large": { + "memory-mb" : 4096, + "vcores" : 4 + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/DefaultClientRequestInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/DefaultClientRequestInterceptor.java index 71de6b470e9..5ce4803f603 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/DefaultClientRequestInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/DefaultClientRequestInterceptor.java @@ -27,6 +27,10 @@ import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -59,6 +63,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileResponse; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest; @@ -303,6 +309,24 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( return clientRMProxy.updateApplicationTimeouts(request); } + @Override + public GetAllResourceProfilesResponse getResourceProfiles( + GetAllResourceProfilesRequest request) throws YarnException, IOException { + return clientRMProxy.getResourceProfiles(request); + } + + @Override + public GetResourceProfileResponse getResourceProfile( + GetResourceProfileRequest request) throws YarnException, IOException { + return clientRMProxy.getResourceProfile(request); + } + + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException { + return clientRMProxy.getResourceTypeInfo(request); + } + @VisibleForTesting public void setRMClient(ApplicationClientProtocol clientRM) { this.clientRMProxy = clientRM; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java index 3a36eec66ac..b8f8a9fd841 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java @@ -32,6 +32,10 @@ import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -64,6 +68,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileResponse; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest; @@ -709,4 +715,21 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( throw new NotImplementedException(); } + @Override + public GetAllResourceProfilesResponse getResourceProfiles( + GetAllResourceProfilesRequest request) throws YarnException, IOException { + throw new NotImplementedException(); + } + + @Override + public GetResourceProfileResponse getResourceProfile( + GetResourceProfileRequest request) throws YarnException, IOException { + throw new NotImplementedException(); + } + + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException { + throw new NotImplementedException(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/RouterClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/RouterClientRMService.java index fd2c610c7fe..73cc18558d1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/RouterClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/RouterClientRMService.java @@ -38,6 +38,10 @@ import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -70,6 +74,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileResponse; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest; @@ -403,6 +409,27 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( return pipeline.getRootInterceptor().updateApplicationTimeouts(request); } + @Override + public GetAllResourceProfilesResponse getResourceProfiles( + GetAllResourceProfilesRequest request) throws YarnException, IOException { + RequestInterceptorChainWrapper pipeline = getInterceptorChain(); + return pipeline.getRootInterceptor().getResourceProfiles(request); + } + + @Override + public GetResourceProfileResponse getResourceProfile( + GetResourceProfileRequest request) throws YarnException, IOException { + RequestInterceptorChainWrapper pipeline = getInterceptorChain(); + return pipeline.getRootInterceptor().getResourceProfile(request); + } + + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException { + RequestInterceptorChainWrapper pipeline = getInterceptorChain(); + return pipeline.getRootInterceptor().getResourceTypeInfo(request); + } + private RequestInterceptorChainWrapper getInterceptorChain() throws IOException { String user = UserGroupInformation.getCurrentUser().getUserName(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/PassThroughClientRequestInterceptor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/PassThroughClientRequestInterceptor.java index c403bd5006c..cb1b529d8eb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/PassThroughClientRequestInterceptor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/test/java/org/apache/hadoop/yarn/server/router/clientrm/PassThroughClientRequestInterceptor.java @@ -24,6 +24,10 @@ import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptRequest; import org.apache.hadoop.yarn.api.protocolrecords.FailApplicationAttemptResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceProfilesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetAllResourceTypeInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; @@ -56,6 +60,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetResourceProfileResponse; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest; @@ -264,4 +270,22 @@ public UpdateApplicationTimeoutsResponse updateApplicationTimeouts( throws YarnException, IOException { return getNextInterceptor().updateApplicationTimeouts(request); } + + @Override + public GetAllResourceProfilesResponse getResourceProfiles( + GetAllResourceProfilesRequest request) throws YarnException, IOException { + return getNextInterceptor().getResourceProfiles(request); + } + + @Override + public GetResourceProfileResponse getResourceProfile( + GetResourceProfileRequest request) throws YarnException, IOException { + return getNextInterceptor().getResourceProfile(request); + } + + @Override + public GetAllResourceTypeInfoResponse getResourceTypeInfo( + GetAllResourceTypeInfoRequest request) throws YarnException, IOException { + return getNextInterceptor().getResourceTypeInfo(request); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java index de282fd0631..3b1247a8c89 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java @@ -95,11 +95,14 @@ import org.apache.hadoop.yarn.server.timeline.TimelineStore; import org.apache.hadoop.yarn.server.timeline.recovery.MemoryTimelineStateStore; import org.apache.hadoop.yarn.server.timeline.recovery.TimelineStateStore; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.util.timeline.TimelineUtils; import org.apache.hadoop.yarn.webapp.util.WebAppUtils; import com.google.common.annotations.VisibleForTesting; +import static org.apache.hadoop.yarn.server.resourcemanager.resource.TestResourceProfiles.TEST_CONF_RESET_RESOURCE_TYPES; + /** *

    * Embedded Yarn minicluster for testcases that need to interact with a cluster. @@ -251,6 +254,10 @@ public void serviceInit(Configuration conf) throws Exception { failoverTimeout = conf.getInt(YarnConfiguration.RM_ZK_TIMEOUT_MS, YarnConfiguration.DEFAULT_RM_ZK_TIMEOUT_MS); + if (conf.getBoolean(TEST_CONF_RESET_RESOURCE_TYPES, true)) { + ResourceUtils.resetResourceTypes(conf); + } + if (useRpc && !useFixedPorts) { throw new YarnRuntimeException("Invalid configuration!" + " Minicluster can use rpc only when configured to use fixed ports"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/TimelineSchemaCreator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/TimelineSchemaCreator.java index 210fd850460..c9f7cecdf56 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/TimelineSchemaCreator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/TimelineSchemaCreator.java @@ -276,7 +276,7 @@ private static void createAllSchemas(Configuration hbaseConf, createAllTables(hbaseConf, skipExisting); LOG.info("Successfully created HBase schema. "); } catch (IOException e) { - LOG.error("Error in creating hbase tables: " + e.getMessage()); + LOG.error("Error in creating hbase tables: ", e); exceptions.add(e); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/CapacityScheduler.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/CapacityScheduler.md index f1d4535ac4b..6bb848900ee 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/CapacityScheduler.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/CapacityScheduler.md @@ -171,6 +171,16 @@ Example: ``` + * Queue lifetime for applications + + The `CapacityScheduler` supports the following parameters to lifetime of an application: + +| Property | Description | +|:---- |:---- | +| `yarn.scheduler.capacity..maximum-application-lifetime` | Maximum lifetime of an application which is submitted to a queue in seconds. Any value less than or equal to zero will be considered as disabled. This will be a hard time limit for all applications in this queue. If positive value is configured then any application submitted to this queue will be killed after exceeds the configured lifetime. User can also specify lifetime per application basis in application submission context. But user lifetime will be overridden if it exceeds queue maximum lifetime. It is point-in-time configuration. Note : Configuring too low value will result in killing application sooner. This feature is applicable only for leaf queue. | +| `yarn.scheduler.capacity.root..default-application-lifetime` | Default lifetime of an application which is submitted to a queue in seconds. Any value less than or equal to zero will be considered as disabled. If the user has not submitted application with lifetime value then this value will be taken. It is point-in-time configuration. Note : Default lifetime can't exceed maximum lifetime. This feature is applicable only for leaf queue.| + + ###Setup for application priority. Application priority works only along with FIFO ordering policy. Default ordering policy is FIFO. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md index 4de0a6ae489..23f41348370 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md @@ -17,6 +17,12 @@ Launching Applications Using Docker Containers +Notice +------ +This feature is experimental and is not complete. Enabling this feature and +running Docker containers in your cluster has security implications. +Please do a security analysis before enabling this feature. + Overview -------- @@ -71,68 +77,83 @@ request. For example: The following properties should be set in yarn-site.xml: ```xml - - yarn.nodemanager.container-executor.class - org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor - - This is the container executor setting that ensures that all applications - are started with the LinuxContainerExecutor. - - + + + yarn.nodemanager.container-executor.class + org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor + + This is the container executor setting that ensures that all applications + are started with the LinuxContainerExecutor. + + - - yarn.nodemanager.linux-container-executor.group - hadoop - - The POSIX group of the NodeManager. It should match the setting in - "container-executor.cfg". This configuration is required for validating - the secure access of the container-executor binary. - - + + yarn.nodemanager.linux-container-executor.group + hadoop + + The POSIX group of the NodeManager. It should match the setting in + "container-executor.cfg". This configuration is required for validating + the secure access of the container-executor binary. + + - - yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users - false - - Whether all applications should be run as the NodeManager process' owner. - When false, applications are launched instead as the application owner. - - + + yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users + false + + Whether all applications should be run as the NodeManager process' owner. + When false, applications are launched instead as the application owner. + + - - yarn.nodemanager.runtime.linux.docker.allowed-container-networks - host,none,bridge - - Optional. A comma-separated set of networks allowed when launching - containers. Valid values are determined by Docker networks available from - `docker network ls` - - + + yarn.nodemanager.runtime.linux.allowed-runtimes + default,docker + + Comma separated list of runtimes that are allowed when using + LinuxContainerExecutor. The allowed values are default, docker, and + javasandbox. + + - - The network used when launching Docker containers when no - network is specified in the request. This network must be one of the - (configurable) set of allowed container networks. - yarn.nodemanager.runtime.linux.docker.default-container-network - host - + + yarn.nodemanager.runtime.linux.docker.allowed-container-networks + host,none,bridge + + Optional. A comma-separated set of networks allowed when launching + containers. Valid values are determined by Docker networks available from + `docker network ls` + + - - yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed - false - - Optional. Whether applications are allowed to run in privileged containers. - - + + yarn.nodemanager.runtime.linux.docker.default-container-network + host + + The network used when launching Docker containers when no + network is specified in the request. This network must be one of the + (configurable) set of allowed container networks. + + - - yarn.nodemanager.runtime.linux.docker.privileged-containers.acl - - - Optional. A comma-separated list of users who are allowed to request - privileged contains if privileged containers are allowed. - - + + yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed + false + + Optional. Whether applications are allowed to run in privileged + containers. + + + + + yarn.nodemanager.runtime.linux.docker.privileged-containers.acl + + + Optional. A comma-separated list of users who are allowed to request + privileged contains if privileged containers are allowed. + + + ``` In addition, a container-executer.cfg file must exist and contain settings for diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceProfiles.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceProfiles.md new file mode 100644 index 00000000000..d422ab685f8 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceProfiles.md @@ -0,0 +1,116 @@ + + +Hadoop: YARN Resource Profiles +============================== + +Overview +-------- +Resource profiles support in YARN helps to extend the YARN resource model to a more flexible model which makes it easier to add new countable resource­types. This solution also helps the users to submit jobs with ease to specify the resources they need. + +Resource model of YARN +----------------------- +Resource Manager will load a new configuration file named `resource-types.xml` to determine the set of resource ­types for which scheduling is enabled. Sample XML will look like below. + +```xml + + + yarn.resource-types + resource1, resource2 + + + + yarn.resource-types.resource1.units + G + + +``` + +Similarly, a new configuration file `node­-resources.xml` will also be loaded by Node Manager where the resource capabilities of a node can be specified. + +```xml + + + yarn.nodemanager.resource-type.resource1 + 5G + + + + yarn.nodemanager.resource-type.resource2 + 2m + + + +``` + +Node Manager will use these custom resource types and will register it's capability to Resource Manager. + +Resource Profiles +----------------- +As per the updated resource model of YARN, specifying and configuring additional resource types for container allocation request will become more cumbersome. With the support of resource profiles, such requests could be placed by using simple profile names which has defined meaning from YARN. + +Admin could specify `resource-profiles.json` which indicates various profiles supported. + +```json +{ + "minimum": { + "memory-mb" : 1024, + "vcores" : 1 + }, + "default" : { + "memory-mb" : 2048, + "vcores" : 2 + }, + "maximum" : { + "memory-mb": 4096, + "vcores" : 4 + }, + "http" : { + "memory-mb" : 2048, + "vcores" : 2 + } +} +``` +This indicates that profile named "minimum", "default", "maximum" and "http" is supported. Clients could easily use this name instead of specifying each resource details. + +Configurations +------------- + +*In `yarn-site.xml`* + +| Configuration Property | Description | +|:---- |:---- | +| `yarn.resourcemanager.resource-profiles.enabled` | *(Required)* In the server side it indicates whether resource profiles support is enabled or not. Defaults to `false`. | + +Please note that, `resource-types.xml` and `node­-resources.xml` file also need to be placed in conf directory if new resources are to be added to YARN. + +*In `resource-types.xml`* + +| Configuration Property | Value | Description | +|:---- |:---- |:---- | +| `yarn.resource-types` | resource1 | Custom resource | +| `yarn.resource-types.resource1.units` | G | Default unit for resource1 type | + +*In `node­-resources.xml`* + +| Configuration Property | Value | Description | +|:---- |:---- |:---- | +| `yarn.nodemanager.resource-type.resource1` | 5G | Resource capability for resource named 'resource1'. | + +Use it +-------- +Distributed Shell is supporting this feature and user could specify a resource profile name which YARN is supporting while running a DS application. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/TimelineServiceV2.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/TimelineServiceV2.md index 2de305d1cd3..7c51ce05f5b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/TimelineServiceV2.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/TimelineServiceV2.md @@ -415,13 +415,13 @@ Service v.2. Note that currently you need to be on the cluster to be able to wri Service. For example, an application master or code in the container can write to the Timeline Service, while an off-cluster MapReduce job submitter cannot. -After creating the timeline v2 client, user also needs to set the timeline collector address for the application. If `AMRMClient` is used then by registering the timeline client by calling `AMRMClient#registerTimelineV2Client` is sufficient. +After creating the timeline v2 client, user also needs to set the timeline collector info which contains the collector address and collector token(only in secure mode) for the application. If `AMRMClient` is used then by registering the timeline client by calling `AMRMClient#registerTimelineV2Client` is sufficient. amRMClient.registerTimelineV2Client(timelineClient); Else address needs to be retrieved from the AM allocate response and need to be set in timeline client explicitly. - timelineClient.setTimelineServiceAddress(response.getCollectorAddr()); + timelineClient.setTimelineCollectorInfo(response.getCollectorInfo()); You can create and publish your own entities, events, and metrics as with previous versions. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/components/base-chart-component.js b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/components/base-chart-component.js index aa418938a7e..26aa2b05105 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/components/base-chart-component.js +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/components/base-chart-component.js @@ -108,7 +108,7 @@ export default Ember.Component.extend({ data = d.data; } - this.tooltip.style("opacity", 0.9); + this.tooltip.style("opacity", 0.7); var value = data.value; if (this.get("type") === "memory") { value = Converter.memoryToSimpliedUnit(value); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/components/nodes-heatmap.js b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/components/nodes-heatmap.js index a1df4808039..84ff59ea683 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/components/nodes-heatmap.js +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/components/nodes-heatmap.js @@ -33,8 +33,9 @@ export default BaseChartComponent.extend({ totalContainers: 0, bindTP: function(element, cell) { + var currentToolTip = this.tooltip; element.on("mouseover", function() { - this.tooltip + currentToolTip .style("left", (d3.event.pageX) + "px") .style("top", (d3.event.pageY - 28) + "px"); cell.style("opacity", 1.0); @@ -42,14 +43,20 @@ export default BaseChartComponent.extend({ .on("mousemove", function() { // Handle pie chart case var text = cell.attr("tooltiptext"); - - this.tooltip.style("opacity", 0.9); - this.tooltip.html(text) - .style("left", (d3.event.pageX) + "px") - .style("top", (d3.event.pageY - 28) + "px"); - }.bind(this)) + currentToolTip + .style("background", "black") + .style("opacity", 0.7); + currentToolTip + .html(text) + .style('font-size', '12px') + .style('color', 'white') + .style('font-weight', '400'); + currentToolTip + .style("left", (d3.event.pageX) + "px") + .style("top", (d3.event.pageY - 28) + "px"); + }.bind(this)) .on("mouseout", function() { - this.tooltip.style("opacity", 0); + currentToolTip.style("opacity", 0); cell.style("opacity", 0.8); }.bind(this)); }, @@ -115,7 +122,10 @@ export default BaseChartComponent.extend({ var xOffset = layout.margin; var yOffset = layout.margin * 3; - var colorFunc = d3.interpolate(d3.rgb("#bdddf5"), d3.rgb("#0f3957")); + var gradientStartColor = "#2ca02c"; + var gradientEndColor = "#ffb014"; + + var colorFunc = d3.interpolateRgb(d3.rgb(gradientStartColor), d3.rgb(gradientEndColor)); var sampleXOffset = (layout.x2 - layout.x1) / 2 - 2.5 * this.SAMPLE_CELL_WIDTH - 2 * this.CELL_MARGIN; @@ -128,7 +138,7 @@ export default BaseChartComponent.extend({ var rect = g.append("rect") .attr("x", sampleXOffset) .attr("y", sampleYOffset) - .attr("fill", this.selectedCategory === i ? "#2ca02c" : colorFunc(ratio)) + .attr("fill", this.selectedCategory === i ? "#2c7bb6" : colorFunc(ratio)) .attr("width", this.SAMPLE_CELL_WIDTH) .attr("height", this.SAMPLE_HEIGHT) .attr("class", "hyperlink"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/styles/app.css b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/styles/app.css index 38e25e4ed96..f48c1868060 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/styles/app.css +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/src/main/webapp/app/styles/app.css @@ -58,8 +58,7 @@ text.queue { } text.heatmap-cell { - font: 14px sans-serif; - font-weight: bold; + font-size: 14px; text-anchor: middle; fill: Azure; text-align: center; @@ -83,11 +82,13 @@ text.heatmap-cell-notselected { text-anchor: middle; fill: Silver; text-align: center; + } text.heatmap-rack { - font: 20px sans-serif; - fill: DimGray; + font-size: 18px; + font-weight: 400; + fill: #4b5052; } path.queue { @@ -111,9 +112,8 @@ line.chart { */ text.chart-title { font-size: 30px; - font-family: sans-serif; text-anchor: middle; - fill: Gray; + fill: #4b5052; } text.donut-highlight-text, text.donut-highlight-sub { @@ -143,9 +143,10 @@ text.bar-chart-text { div.tooltip { position: absolute; text-align: center; - padding: 2px; - font: 24px sans-serif; - background: lightsteelblue; + padding: 10px; + font-size: 16px; + background: black; + color: white; border: 0px; border-radius: 8px; pointer-events: none; diff --git a/pom.xml b/pom.xml index efd75782ea2..08e858514f1 100644 --- a/pom.xml +++ b/pom.xml @@ -96,7 +96,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 1.5 1.7 2.4 - 2.10 + 3.0.1 3.0.0-M1 2.10.4 1.5