YARN-3753. RM failed to come up with "java.io.IOException: Wait for ZKClient creation timed out". Contributed by Jian He
This commit is contained in:
parent
1ff3f16ed0
commit
b34825b0cb
|
@ -142,6 +142,9 @@ Release 2.7.1 - UNRELEASED
|
|||
YARN-3725. App submission via REST API is broken in secure mode due to
|
||||
Timeline DT service address is empty. (Zhijie Shen via wangda)
|
||||
|
||||
YARN-3753. RM failed to come up with "java.io.IOException: Wait for
|
||||
ZKClient creation timed out”. (Jian He via xgong)
|
||||
|
||||
Release 2.7.0 - 2015-04-20
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -104,6 +104,8 @@ public class ZKRMStateStore extends RMStateStore {
|
|||
|
||||
private String zkHostPort = null;
|
||||
private int zkSessionTimeout;
|
||||
// wait time for zkClient to re-establish connection with zk-server.
|
||||
private long zkResyncWaitTime;
|
||||
|
||||
@VisibleForTesting
|
||||
long zkRetryInterval;
|
||||
|
@ -234,6 +236,7 @@ public class ZKRMStateStore extends RMStateStore {
|
|||
conf.getLong(YarnConfiguration.RM_ZK_RETRY_INTERVAL_MS,
|
||||
YarnConfiguration.DEFAULT_RM_ZK_RETRY_INTERVAL_MS);
|
||||
}
|
||||
zkResyncWaitTime = zkRetryInterval * numRetries;
|
||||
|
||||
zkAcl = RMZKUtils.getZKAcls(conf);
|
||||
zkAuths = RMZKUtils.getZKAuths(conf);
|
||||
|
@ -1081,11 +1084,11 @@ public class ZKRMStateStore extends RMStateStore {
|
|||
long startTime = System.currentTimeMillis();
|
||||
synchronized (ZKRMStateStore.this) {
|
||||
while (zkClient == null) {
|
||||
ZKRMStateStore.this.wait(zkSessionTimeout);
|
||||
ZKRMStateStore.this.wait(zkResyncWaitTime);
|
||||
if (zkClient != null) {
|
||||
break;
|
||||
}
|
||||
if (System.currentTimeMillis() - startTime > zkSessionTimeout) {
|
||||
if (System.currentTimeMillis() - startTime > zkResyncWaitTime) {
|
||||
throw new IOException("Wait for ZKClient creation timed out");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -170,10 +170,10 @@ public class TestZKRMStateStoreZKClientConnections extends
|
|||
throws Exception {
|
||||
|
||||
TestZKClient zkClientTester = new TestZKClient();
|
||||
String path = "/test";
|
||||
final String path = "/test";
|
||||
YarnConfiguration conf = new YarnConfiguration();
|
||||
conf.setInt(YarnConfiguration.RM_ZK_TIMEOUT_MS, ZK_TIMEOUT_MS);
|
||||
ZKRMStateStore store =
|
||||
final ZKRMStateStore store =
|
||||
(ZKRMStateStore) zkClientTester.getRMStateStore(conf);
|
||||
TestDispatcher dispatcher = new TestDispatcher();
|
||||
store.setRMDispatcher(dispatcher);
|
||||
|
@ -185,14 +185,20 @@ public class TestZKRMStateStoreZKClientConnections extends
|
|||
store.setDataWithRetries(path, "newBytes".getBytes(), 0);
|
||||
|
||||
stopServer();
|
||||
final AtomicBoolean isSucceeded = new AtomicBoolean(false);
|
||||
zkClientTester.watcher.waitForDisconnected(ZK_OP_WAIT_TIME);
|
||||
try {
|
||||
store.getDataWithRetries(path, true);
|
||||
fail("Expected ZKClient time out exception");
|
||||
} catch (Exception e) {
|
||||
assertTrue(e.getMessage().contains(
|
||||
"Wait for ZKClient creation timed out"));
|
||||
}
|
||||
Thread thread = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
store.getDataWithRetries(path, true);
|
||||
isSucceeded.set(true);
|
||||
} catch (Exception e) {
|
||||
isSucceeded.set(false);
|
||||
}
|
||||
}
|
||||
};
|
||||
thread.start();
|
||||
|
||||
// ZKRMStateStore Session restored
|
||||
startServer();
|
||||
|
@ -206,6 +212,8 @@ public class TestZKRMStateStoreZKClientConnections extends
|
|||
fail(error);
|
||||
}
|
||||
assertEquals("newBytes", new String(ret));
|
||||
thread.join();
|
||||
assertTrue(isSucceeded.get());
|
||||
}
|
||||
|
||||
@Test(timeout = 20000)
|
||||
|
|
Loading…
Reference in New Issue