YARN-2054. Better defaults for YARN ZK configs for retries and retry-inteval when HA is enabled. (kasha)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1598632 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
452b37a2d7
commit
4c4ebe2353
|
@ -102,6 +102,9 @@ Release 2.5.0 - UNRELEASED
|
||||||
YARN-596. Use scheduling policies throughout the queue hierarchy to decide
|
YARN-596. Use scheduling policies throughout the queue hierarchy to decide
|
||||||
which containers to preempt (Wei Yan via Sandy Ryza)
|
which containers to preempt (Wei Yan via Sandy Ryza)
|
||||||
|
|
||||||
|
YARN-2054. Better defaults for YARN ZK configs for retries and retry-inteval
|
||||||
|
when HA is enabled. (kasha)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
|
@ -324,11 +324,11 @@ public class YarnConfiguration extends Configuration {
|
||||||
public static final String RM_ZK_ADDRESS = RM_ZK_PREFIX + "address";
|
public static final String RM_ZK_ADDRESS = RM_ZK_PREFIX + "address";
|
||||||
|
|
||||||
public static final String RM_ZK_NUM_RETRIES = RM_ZK_PREFIX + "num-retries";
|
public static final String RM_ZK_NUM_RETRIES = RM_ZK_PREFIX + "num-retries";
|
||||||
public static final int DEFAULT_ZK_RM_NUM_RETRIES = 500;
|
public static final int DEFAULT_ZK_RM_NUM_RETRIES = 1000;
|
||||||
|
|
||||||
public static final String RM_ZK_RETRY_INTERVAL_MS =
|
public static final String RM_ZK_RETRY_INTERVAL_MS =
|
||||||
RM_ZK_PREFIX + "retry-interval-ms";
|
RM_ZK_PREFIX + "retry-interval-ms";
|
||||||
public static final long DEFAULT_RM_ZK_RETRY_INTERVAL_MS = 2000;
|
public static final long DEFAULT_RM_ZK_RETRY_INTERVAL_MS = 1000;
|
||||||
|
|
||||||
public static final String RM_ZK_TIMEOUT_MS = RM_ZK_PREFIX + "timeout-ms";
|
public static final String RM_ZK_TIMEOUT_MS = RM_ZK_PREFIX + "timeout-ms";
|
||||||
public static final int DEFAULT_RM_ZK_TIMEOUT_MS = 10000;
|
public static final int DEFAULT_RM_ZK_TIMEOUT_MS = 10000;
|
||||||
|
|
|
@ -309,14 +309,17 @@
|
||||||
<property>
|
<property>
|
||||||
<description>Number of times RM tries to connect to ZooKeeper.</description>
|
<description>Number of times RM tries to connect to ZooKeeper.</description>
|
||||||
<name>yarn.resourcemanager.zk-num-retries</name>
|
<name>yarn.resourcemanager.zk-num-retries</name>
|
||||||
<value>500</value>
|
<value>1000</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>Retry interval in milliseconds when connecting to ZooKeeper.
|
<description>Retry interval in milliseconds when connecting to ZooKeeper.
|
||||||
|
When HA is enabled, the value here is NOT used. It is generated
|
||||||
|
automatically from yarn.resourcemanager.zk-timeout-ms and
|
||||||
|
yarn.resourcemanager.zk-num-retries.
|
||||||
</description>
|
</description>
|
||||||
<name>yarn.resourcemanager.zk-retry-interval-ms</name>
|
<name>yarn.resourcemanager.zk-retry-interval-ms</name>
|
||||||
<value>2000</value>
|
<value>1000</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
|
|
|
@ -90,7 +90,9 @@ public class ZKRMStateStore extends RMStateStore {
|
||||||
|
|
||||||
private String zkHostPort = null;
|
private String zkHostPort = null;
|
||||||
private int zkSessionTimeout;
|
private int zkSessionTimeout;
|
||||||
private long zkRetryInterval;
|
|
||||||
|
@VisibleForTesting
|
||||||
|
long zkRetryInterval;
|
||||||
private List<ACL> zkAcl;
|
private List<ACL> zkAcl;
|
||||||
private List<ZKUtil.ZKAuthInfo> zkAuths;
|
private List<ZKUtil.ZKAuthInfo> zkAuths;
|
||||||
|
|
||||||
|
@ -199,9 +201,14 @@ public class ZKRMStateStore extends RMStateStore {
|
||||||
zkSessionTimeout =
|
zkSessionTimeout =
|
||||||
conf.getInt(YarnConfiguration.RM_ZK_TIMEOUT_MS,
|
conf.getInt(YarnConfiguration.RM_ZK_TIMEOUT_MS,
|
||||||
YarnConfiguration.DEFAULT_RM_ZK_TIMEOUT_MS);
|
YarnConfiguration.DEFAULT_RM_ZK_TIMEOUT_MS);
|
||||||
zkRetryInterval =
|
|
||||||
conf.getLong(YarnConfiguration.RM_ZK_RETRY_INTERVAL_MS,
|
if (HAUtil.isHAEnabled(conf)) {
|
||||||
YarnConfiguration.DEFAULT_RM_ZK_RETRY_INTERVAL_MS);
|
zkRetryInterval = zkSessionTimeout / numRetries;
|
||||||
|
} else {
|
||||||
|
zkRetryInterval =
|
||||||
|
conf.getLong(YarnConfiguration.RM_ZK_RETRY_INTERVAL_MS,
|
||||||
|
YarnConfiguration.DEFAULT_RM_ZK_RETRY_INTERVAL_MS);
|
||||||
|
}
|
||||||
|
|
||||||
zkAcl = RMZKUtils.getZKAcls(conf);
|
zkAcl = RMZKUtils.getZKAcls(conf);
|
||||||
zkAuths = RMZKUtils.getZKAuths(conf);
|
zkAuths = RMZKUtils.getZKAuths(conf);
|
||||||
|
|
|
@ -41,6 +41,7 @@ import java.security.NoSuchAlgorithmException;
|
||||||
import java.util.concurrent.CyclicBarrier;
|
import java.util.concurrent.CyclicBarrier;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
|
@ -203,7 +204,7 @@ public class TestZKRMStateStoreZKClientConnections extends
|
||||||
LOG.error(error, e);
|
LOG.error(error, e);
|
||||||
fail(error);
|
fail(error);
|
||||||
}
|
}
|
||||||
Assert.assertEquals("newBytes", new String(ret));
|
assertEquals("newBytes", new String(ret));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 20000)
|
@Test(timeout = 20000)
|
||||||
|
@ -232,7 +233,7 @@ public class TestZKRMStateStoreZKClientConnections extends
|
||||||
|
|
||||||
try {
|
try {
|
||||||
byte[] ret = store.getDataWithRetries(path, false);
|
byte[] ret = store.getDataWithRetries(path, false);
|
||||||
Assert.assertEquals("bytes", new String(ret));
|
assertEquals("bytes", new String(ret));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
String error = "New session creation failed";
|
String error = "New session creation failed";
|
||||||
LOG.error(error, e);
|
LOG.error(error, e);
|
||||||
|
@ -281,4 +282,24 @@ public class TestZKRMStateStoreZKClientConnections extends
|
||||||
|
|
||||||
zkClientTester.getRMStateStore(conf);
|
zkClientTester.getRMStateStore(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testZKRetryInterval() throws Exception {
|
||||||
|
TestZKClient zkClientTester = new TestZKClient();
|
||||||
|
YarnConfiguration conf = new YarnConfiguration();
|
||||||
|
|
||||||
|
ZKRMStateStore store =
|
||||||
|
(ZKRMStateStore) zkClientTester.getRMStateStore(conf);
|
||||||
|
assertEquals(YarnConfiguration.DEFAULT_RM_ZK_RETRY_INTERVAL_MS,
|
||||||
|
store.zkRetryInterval);
|
||||||
|
store.stop();
|
||||||
|
|
||||||
|
conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
|
||||||
|
store =
|
||||||
|
(ZKRMStateStore) zkClientTester.getRMStateStore(conf);
|
||||||
|
assertEquals(YarnConfiguration.DEFAULT_RM_ZK_TIMEOUT_MS /
|
||||||
|
YarnConfiguration.DEFAULT_ZK_RM_NUM_RETRIES,
|
||||||
|
store.zkRetryInterval);
|
||||||
|
store.stop();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue