From 77a60701c1828eca51538016b3d8fee6652f8d90 Mon Sep 17 00:00:00 2001 From: Arun Murthy Date: Thu, 15 Aug 2013 02:36:58 +0000 Subject: [PATCH] Merge -c 1514135 from trunk to branch-2 to fix YARN-1056. Remove dual use of string 'resourcemanager' in yarn.resourcemanager.connect.{max.wait.secs|retry_interval.secs}. Contributed by Karthik Kambatla. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1514136 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 4 +++ .../hadoop/yarn/conf/YarnConfiguration.java | 24 +++++++------- .../apache/hadoop/yarn/client/RMProxy.java | 24 +++++--------- .../src/main/resources/yarn-default.xml | 16 ++++++++- .../nodemanager/TestNodeStatusUpdater.java | 33 +++++++++---------- 5 files changed, 55 insertions(+), 46 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 5e3a506aca0..936051c263d 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -860,6 +860,10 @@ Release 2.1.0-beta - 2013-08-06 YARN-1043. Push all metrics consistently. (Jian He via acmurthy) + YARN-1056. Remove dual use of string 'resourcemanager' in + yarn.resourcemanager.connect.{max.wait.secs|retry_interval.secs} + (Karthik Kambatla via acmurthy) + Release 2.0.5-alpha - 06/06/2013 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 065fb6344c6..ec9eb19c4f7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -259,7 +259,7 @@ public class YarnConfiguration extends Configuration { /** URI for FileSystemRMStateStore */ public static final String FS_RM_STATE_STORE_URI = - RM_PREFIX + "fs.rm-state-store.uri"; + RM_PREFIX + "fs.state-store.uri"; /** The maximum number of completed applications RM keeps. */ public static final String RM_MAX_COMPLETED_APPLICATIONS = @@ -655,19 +655,17 @@ public class YarnConfiguration extends Configuration { public static final long DEFAULT_NM_PROCESS_KILL_WAIT_MS = 2000; - /** Max time to wait to establish a connection to RM - */ - public static final String RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS = - RM_PREFIX + "resourcemanager.connect.max.wait.secs"; - public static final int DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS = - 15*60; + /** Max time to wait to establish a connection to RM */ + public static final String RESOURCEMANAGER_CONNECT_MAX_WAIT_MS = + RM_PREFIX + "connect.max-wait.ms"; + public static final int DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS = + 15 * 60 * 1000; - /** Time interval between each attempt to connect to RM - */ - public static final String RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS = - RM_PREFIX + "resourcemanager.connect.retry_interval.secs"; - public static final long DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS - = 30; + /** Time interval between each attempt to connect to RM */ + public static final String RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS = + RM_PREFIX + "connect.retry-interval.ms"; + public static final long DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS + = 30 * 1000; /** * CLASSPATH for YARN applications. A comma-separated list of CLASSPATH diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java index 3b166a8806c..5fff760eb2d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java @@ -35,14 +35,10 @@ import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryProxy; -import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.ipc.YarnRPC; -import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import com.google.common.annotations.VisibleForTesting; @@ -79,38 +75,36 @@ public T run() { public static RetryPolicy createRetryPolicy(Configuration conf) { long rmConnectWaitMS = conf.getInt( - YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS, - YarnConfiguration.DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS) - * 1000; + YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, + YarnConfiguration.DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS); long rmConnectionRetryIntervalMS = conf.getLong( - YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS, + YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, YarnConfiguration - .DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS) - * 1000; + .DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS); if (rmConnectionRetryIntervalMS < 0) { throw new YarnRuntimeException("Invalid Configuration. " + - YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS + + YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS + " should not be negative."); } - boolean waitForEver = (rmConnectWaitMS == -1000); + boolean waitForEver = (rmConnectWaitMS == -1); if (waitForEver) { return RetryPolicies.RETRY_FOREVER; } else { if (rmConnectWaitMS < 0) { throw new YarnRuntimeException("Invalid Configuration. " - + YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS + + YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS + " can be -1, but can not be other negative numbers"); } // try connect once if (rmConnectWaitMS < rmConnectionRetryIntervalMS) { - LOG.warn(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS + LOG.warn(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS + " is smaller than " - + YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS + + YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS + ". Only try connect once."); rmConnectWaitMS = 0; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index b6753bc4adc..ab8d50aab10 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -140,6 +140,20 @@ 1000 + + Maximum time to wait to establish connection to + ResourceManager. + yarn.resourcemanager.connect.max-wait.ms + 900000 + + + + How often to try connecting to the + ResourceManager. + yarn.resourcemanager.connect.retry-interval.ms + 30000 + + The maximum number of application attempts. It's a global setting for all application masters. Each application master can specify @@ -249,7 +263,7 @@ RM state will be stored. This must be supplied when using org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore as the value for yarn.resourcemanager.store.class - yarn.resourcemanager.fs.rm-state-store.uri + yarn.resourcemanager.fs.state-store.uri ${hadoop.tmp.dir}/yarn/system/rmstore diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java index 78ab13ea835..d2119a75072 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java @@ -957,15 +957,14 @@ protected NodeStatusUpdater createUpdater(Context context, @Test (timeout = 150000) public void testNMConnectionToRM() throws Exception { final long delta = 50000; - final long connectionWaitSecs = 5; - final long connectionRetryIntervalSecs = 1; + final long connectionWaitMs = 5000; + final long connectionRetryIntervalMs = 1000; //Waiting for rmStartIntervalMS, RM will be started final long rmStartIntervalMS = 2*1000; - conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS, - connectionWaitSecs); - conf.setLong(YarnConfiguration - .RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS, - connectionRetryIntervalSecs); + conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, + connectionWaitMs); + conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, + connectionRetryIntervalMs); //Test NM try to connect to RM Several times, but finally fail NodeManagerWithCustomNodeStatusUpdater nmWithUpdater; @@ -987,15 +986,15 @@ protected NodeStatusUpdater createUpdater(Context context, } catch(Exception e) { long t = System.currentTimeMillis(); long duration = t - waitStartTime; - boolean waitTimeValid = (duration >= connectionWaitSecs * 1000) - && (duration < (connectionWaitSecs * 1000 + delta)); + boolean waitTimeValid = (duration >= connectionWaitMs) + && (duration < (connectionWaitMs + delta)); if(!waitTimeValid) { //either the exception was too early, or it had a different cause. //reject with the inner stack trace throw new Exception("NM should have tried re-connecting to RM during " + - "period of at least " + connectionWaitSecs + " seconds, but " + - "stopped retrying within " + (connectionWaitSecs + delta/1000) + - " seconds: " + e, e); + "period of at least " + connectionWaitMs + " ms, but " + + "stopped retrying within " + (connectionWaitMs + delta) + + " ms: " + e, e); } } @@ -1149,14 +1148,14 @@ protected NMContext createNMContext( @Test(timeout = 200000) public void testNodeStatusUpdaterRetryAndNMShutdown() throws Exception { - final long connectionWaitSecs = 1; - final long connectionRetryIntervalSecs = 1; + final long connectionWaitSecs = 1000; + final long connectionRetryIntervalMs = 1000; YarnConfiguration conf = createNMConfig(); - conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS, + conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, connectionWaitSecs); conf.setLong(YarnConfiguration - .RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS, - connectionRetryIntervalSecs); + .RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, + connectionRetryIntervalMs); conf.setLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, 5000); CyclicBarrier syncBarrier = new CyclicBarrier(2); nm = new MyNodeManager2(syncBarrier, conf);