YARN-3753. RM failed to come up with "java.io.IOException: Wait for ZKClient creation timed out". Contributed by Jian He

This commit is contained in:
Xuan 2015-06-02 10:28:14 -07:00
parent 1ff3f16ed0
commit b34825b0cb
3 changed files with 25 additions and 11 deletions

View File

@ -142,6 +142,9 @@ Release 2.7.1 - UNRELEASED
YARN-3725. App submission via REST API is broken in secure mode due to
Timeline DT service address is empty. (Zhijie Shen via wangda)
YARN-3753. RM failed to come up with "java.io.IOException: Wait for
ZKClient creation timed out”. (Jian He via xgong)
Release 2.7.0 - 2015-04-20
INCOMPATIBLE CHANGES

View File

@ -104,6 +104,8 @@ public class ZKRMStateStore extends RMStateStore {
private String zkHostPort = null;
private int zkSessionTimeout;
// wait time for zkClient to re-establish connection with zk-server.
private long zkResyncWaitTime;
@VisibleForTesting
long zkRetryInterval;
@ -234,6 +236,7 @@ public class ZKRMStateStore extends RMStateStore {
conf.getLong(YarnConfiguration.RM_ZK_RETRY_INTERVAL_MS,
YarnConfiguration.DEFAULT_RM_ZK_RETRY_INTERVAL_MS);
}
zkResyncWaitTime = zkRetryInterval * numRetries;
zkAcl = RMZKUtils.getZKAcls(conf);
zkAuths = RMZKUtils.getZKAuths(conf);
@ -1081,11 +1084,11 @@ public class ZKRMStateStore extends RMStateStore {
long startTime = System.currentTimeMillis();
synchronized (ZKRMStateStore.this) {
while (zkClient == null) {
ZKRMStateStore.this.wait(zkSessionTimeout);
ZKRMStateStore.this.wait(zkResyncWaitTime);
if (zkClient != null) {
break;
}
if (System.currentTimeMillis() - startTime > zkSessionTimeout) {
if (System.currentTimeMillis() - startTime > zkResyncWaitTime) {
throw new IOException("Wait for ZKClient creation timed out");
}
}

View File

@ -170,10 +170,10 @@ public class TestZKRMStateStoreZKClientConnections extends
throws Exception {
TestZKClient zkClientTester = new TestZKClient();
String path = "/test";
final String path = "/test";
YarnConfiguration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.RM_ZK_TIMEOUT_MS, ZK_TIMEOUT_MS);
ZKRMStateStore store =
final ZKRMStateStore store =
(ZKRMStateStore) zkClientTester.getRMStateStore(conf);
TestDispatcher dispatcher = new TestDispatcher();
store.setRMDispatcher(dispatcher);
@ -185,14 +185,20 @@ public class TestZKRMStateStoreZKClientConnections extends
store.setDataWithRetries(path, "newBytes".getBytes(), 0);
stopServer();
final AtomicBoolean isSucceeded = new AtomicBoolean(false);
zkClientTester.watcher.waitForDisconnected(ZK_OP_WAIT_TIME);
try {
store.getDataWithRetries(path, true);
fail("Expected ZKClient time out exception");
} catch (Exception e) {
assertTrue(e.getMessage().contains(
"Wait for ZKClient creation timed out"));
}
Thread thread = new Thread() {
@Override
public void run() {
try {
store.getDataWithRetries(path, true);
isSucceeded.set(true);
} catch (Exception e) {
isSucceeded.set(false);
}
}
};
thread.start();
// ZKRMStateStore Session restored
startServer();
@ -206,6 +212,8 @@ public class TestZKRMStateStoreZKClientConnections extends
fail(error);
}
assertEquals("newBytes", new String(ret));
thread.join();
assertTrue(isSucceeded.get());
}
@Test(timeout = 20000)