Merge -c 1514135 from trunk to branch-2 to fix YARN-1056. Remove dual use of string 'resourcemanager' in yarn.resourcemanager.connect.{max.wait.secs|retry_interval.secs}. Contributed by Karthik Kambatla.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1514136 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
777a48855b
commit
77a60701c1
|
@ -860,6 +860,10 @@ Release 2.1.0-beta - 2013-08-06
|
||||||
|
|
||||||
YARN-1043. Push all metrics consistently. (Jian He via acmurthy)
|
YARN-1043. Push all metrics consistently. (Jian He via acmurthy)
|
||||||
|
|
||||||
|
YARN-1056. Remove dual use of string 'resourcemanager' in
|
||||||
|
yarn.resourcemanager.connect.{max.wait.secs|retry_interval.secs}
|
||||||
|
(Karthik Kambatla via acmurthy)
|
||||||
|
|
||||||
Release 2.0.5-alpha - 06/06/2013
|
Release 2.0.5-alpha - 06/06/2013
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -259,7 +259,7 @@ public class YarnConfiguration extends Configuration {
|
||||||
|
|
||||||
/** URI for FileSystemRMStateStore */
|
/** URI for FileSystemRMStateStore */
|
||||||
public static final String FS_RM_STATE_STORE_URI =
|
public static final String FS_RM_STATE_STORE_URI =
|
||||||
RM_PREFIX + "fs.rm-state-store.uri";
|
RM_PREFIX + "fs.state-store.uri";
|
||||||
|
|
||||||
/** The maximum number of completed applications RM keeps. */
|
/** The maximum number of completed applications RM keeps. */
|
||||||
public static final String RM_MAX_COMPLETED_APPLICATIONS =
|
public static final String RM_MAX_COMPLETED_APPLICATIONS =
|
||||||
|
@ -655,19 +655,17 @@ public class YarnConfiguration extends Configuration {
|
||||||
public static final long DEFAULT_NM_PROCESS_KILL_WAIT_MS =
|
public static final long DEFAULT_NM_PROCESS_KILL_WAIT_MS =
|
||||||
2000;
|
2000;
|
||||||
|
|
||||||
/** Max time to wait to establish a connection to RM
|
/** Max time to wait to establish a connection to RM */
|
||||||
*/
|
public static final String RESOURCEMANAGER_CONNECT_MAX_WAIT_MS =
|
||||||
public static final String RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS =
|
RM_PREFIX + "connect.max-wait.ms";
|
||||||
RM_PREFIX + "resourcemanager.connect.max.wait.secs";
|
public static final int DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS =
|
||||||
public static final int DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS =
|
15 * 60 * 1000;
|
||||||
15*60;
|
|
||||||
|
|
||||||
/** Time interval between each attempt to connect to RM
|
/** Time interval between each attempt to connect to RM */
|
||||||
*/
|
public static final String RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS =
|
||||||
public static final String RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS =
|
RM_PREFIX + "connect.retry-interval.ms";
|
||||||
RM_PREFIX + "resourcemanager.connect.retry_interval.secs";
|
public static final long DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS
|
||||||
public static final long DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS
|
= 30 * 1000;
|
||||||
= 30;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* CLASSPATH for YARN applications. A comma-separated list of CLASSPATH
|
* CLASSPATH for YARN applications. A comma-separated list of CLASSPATH
|
||||||
|
|
|
@ -35,14 +35,10 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.io.retry.RetryPolicies;
|
import org.apache.hadoop.io.retry.RetryPolicies;
|
||||||
import org.apache.hadoop.io.retry.RetryPolicy;
|
import org.apache.hadoop.io.retry.RetryPolicy;
|
||||||
import org.apache.hadoop.io.retry.RetryProxy;
|
import org.apache.hadoop.io.retry.RetryProxy;
|
||||||
import org.apache.hadoop.security.SecurityUtil;
|
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.security.token.Token;
|
|
||||||
import org.apache.hadoop.security.token.TokenIdentifier;
|
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.ipc.YarnRPC;
|
import org.apache.hadoop.yarn.ipc.YarnRPC;
|
||||||
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
|
@ -79,38 +75,36 @@ public class RMProxy<T> {
|
||||||
public static RetryPolicy createRetryPolicy(Configuration conf) {
|
public static RetryPolicy createRetryPolicy(Configuration conf) {
|
||||||
long rmConnectWaitMS =
|
long rmConnectWaitMS =
|
||||||
conf.getInt(
|
conf.getInt(
|
||||||
YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS,
|
YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS,
|
||||||
YarnConfiguration.DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS)
|
YarnConfiguration.DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS);
|
||||||
* 1000;
|
|
||||||
long rmConnectionRetryIntervalMS =
|
long rmConnectionRetryIntervalMS =
|
||||||
conf.getLong(
|
conf.getLong(
|
||||||
YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS,
|
YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS,
|
||||||
YarnConfiguration
|
YarnConfiguration
|
||||||
.DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS)
|
.DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS);
|
||||||
* 1000;
|
|
||||||
|
|
||||||
if (rmConnectionRetryIntervalMS < 0) {
|
if (rmConnectionRetryIntervalMS < 0) {
|
||||||
throw new YarnRuntimeException("Invalid Configuration. " +
|
throw new YarnRuntimeException("Invalid Configuration. " +
|
||||||
YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS +
|
YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS +
|
||||||
" should not be negative.");
|
" should not be negative.");
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean waitForEver = (rmConnectWaitMS == -1000);
|
boolean waitForEver = (rmConnectWaitMS == -1);
|
||||||
|
|
||||||
if (waitForEver) {
|
if (waitForEver) {
|
||||||
return RetryPolicies.RETRY_FOREVER;
|
return RetryPolicies.RETRY_FOREVER;
|
||||||
} else {
|
} else {
|
||||||
if (rmConnectWaitMS < 0) {
|
if (rmConnectWaitMS < 0) {
|
||||||
throw new YarnRuntimeException("Invalid Configuration. "
|
throw new YarnRuntimeException("Invalid Configuration. "
|
||||||
+ YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS
|
+ YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS
|
||||||
+ " can be -1, but can not be other negative numbers");
|
+ " can be -1, but can not be other negative numbers");
|
||||||
}
|
}
|
||||||
|
|
||||||
// try connect once
|
// try connect once
|
||||||
if (rmConnectWaitMS < rmConnectionRetryIntervalMS) {
|
if (rmConnectWaitMS < rmConnectionRetryIntervalMS) {
|
||||||
LOG.warn(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS
|
LOG.warn(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS
|
||||||
+ " is smaller than "
|
+ " is smaller than "
|
||||||
+ YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS
|
+ YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS
|
||||||
+ ". Only try connect once.");
|
+ ". Only try connect once.");
|
||||||
rmConnectWaitMS = 0;
|
rmConnectWaitMS = 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -140,6 +140,20 @@
|
||||||
<value>1000</value>
|
<value>1000</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>Maximum time to wait to establish connection to
|
||||||
|
ResourceManager.</description>
|
||||||
|
<name>yarn.resourcemanager.connect.max-wait.ms</name>
|
||||||
|
<value>900000</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>How often to try connecting to the
|
||||||
|
ResourceManager.</description>
|
||||||
|
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
|
||||||
|
<value>30000</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>The maximum number of application attempts. It's a global
|
<description>The maximum number of application attempts. It's a global
|
||||||
setting for all application masters. Each application master can specify
|
setting for all application masters. Each application master can specify
|
||||||
|
@ -249,7 +263,7 @@
|
||||||
RM state will be stored. This must be supplied when using
|
RM state will be stored. This must be supplied when using
|
||||||
org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
|
org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
|
||||||
as the value for yarn.resourcemanager.store.class</description>
|
as the value for yarn.resourcemanager.store.class</description>
|
||||||
<name>yarn.resourcemanager.fs.rm-state-store.uri</name>
|
<name>yarn.resourcemanager.fs.state-store.uri</name>
|
||||||
<value>${hadoop.tmp.dir}/yarn/system/rmstore</value>
|
<value>${hadoop.tmp.dir}/yarn/system/rmstore</value>
|
||||||
<!--value>hdfs://localhost:9000/rmstore</value-->
|
<!--value>hdfs://localhost:9000/rmstore</value-->
|
||||||
</property>
|
</property>
|
||||||
|
|
|
@ -957,15 +957,14 @@ public class TestNodeStatusUpdater {
|
||||||
@Test (timeout = 150000)
|
@Test (timeout = 150000)
|
||||||
public void testNMConnectionToRM() throws Exception {
|
public void testNMConnectionToRM() throws Exception {
|
||||||
final long delta = 50000;
|
final long delta = 50000;
|
||||||
final long connectionWaitSecs = 5;
|
final long connectionWaitMs = 5000;
|
||||||
final long connectionRetryIntervalSecs = 1;
|
final long connectionRetryIntervalMs = 1000;
|
||||||
//Waiting for rmStartIntervalMS, RM will be started
|
//Waiting for rmStartIntervalMS, RM will be started
|
||||||
final long rmStartIntervalMS = 2*1000;
|
final long rmStartIntervalMS = 2*1000;
|
||||||
conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS,
|
conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS,
|
||||||
connectionWaitSecs);
|
connectionWaitMs);
|
||||||
conf.setLong(YarnConfiguration
|
conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS,
|
||||||
.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS,
|
connectionRetryIntervalMs);
|
||||||
connectionRetryIntervalSecs);
|
|
||||||
|
|
||||||
//Test NM try to connect to RM Several times, but finally fail
|
//Test NM try to connect to RM Several times, but finally fail
|
||||||
NodeManagerWithCustomNodeStatusUpdater nmWithUpdater;
|
NodeManagerWithCustomNodeStatusUpdater nmWithUpdater;
|
||||||
|
@ -987,15 +986,15 @@ public class TestNodeStatusUpdater {
|
||||||
} catch(Exception e) {
|
} catch(Exception e) {
|
||||||
long t = System.currentTimeMillis();
|
long t = System.currentTimeMillis();
|
||||||
long duration = t - waitStartTime;
|
long duration = t - waitStartTime;
|
||||||
boolean waitTimeValid = (duration >= connectionWaitSecs * 1000)
|
boolean waitTimeValid = (duration >= connectionWaitMs)
|
||||||
&& (duration < (connectionWaitSecs * 1000 + delta));
|
&& (duration < (connectionWaitMs + delta));
|
||||||
if(!waitTimeValid) {
|
if(!waitTimeValid) {
|
||||||
//either the exception was too early, or it had a different cause.
|
//either the exception was too early, or it had a different cause.
|
||||||
//reject with the inner stack trace
|
//reject with the inner stack trace
|
||||||
throw new Exception("NM should have tried re-connecting to RM during " +
|
throw new Exception("NM should have tried re-connecting to RM during " +
|
||||||
"period of at least " + connectionWaitSecs + " seconds, but " +
|
"period of at least " + connectionWaitMs + " ms, but " +
|
||||||
"stopped retrying within " + (connectionWaitSecs + delta/1000) +
|
"stopped retrying within " + (connectionWaitMs + delta) +
|
||||||
" seconds: " + e, e);
|
" ms: " + e, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1149,14 +1148,14 @@ public class TestNodeStatusUpdater {
|
||||||
@Test(timeout = 200000)
|
@Test(timeout = 200000)
|
||||||
public void testNodeStatusUpdaterRetryAndNMShutdown()
|
public void testNodeStatusUpdaterRetryAndNMShutdown()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
final long connectionWaitSecs = 1;
|
final long connectionWaitSecs = 1000;
|
||||||
final long connectionRetryIntervalSecs = 1;
|
final long connectionRetryIntervalMs = 1000;
|
||||||
YarnConfiguration conf = createNMConfig();
|
YarnConfiguration conf = createNMConfig();
|
||||||
conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_SECS,
|
conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS,
|
||||||
connectionWaitSecs);
|
connectionWaitSecs);
|
||||||
conf.setLong(YarnConfiguration
|
conf.setLong(YarnConfiguration
|
||||||
.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS,
|
.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS,
|
||||||
connectionRetryIntervalSecs);
|
connectionRetryIntervalMs);
|
||||||
conf.setLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, 5000);
|
conf.setLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, 5000);
|
||||||
CyclicBarrier syncBarrier = new CyclicBarrier(2);
|
CyclicBarrier syncBarrier = new CyclicBarrier(2);
|
||||||
nm = new MyNodeManager2(syncBarrier, conf);
|
nm = new MyNodeManager2(syncBarrier, conf);
|
||||||
|
|
Loading…
Reference in New Issue