YARN-4348. ZKRMStateStore.syncInternal shouldn't wait for sync completion for avoiding blocking ZK's event thread. (ozawa)

(cherry picked from commit 0460b8a8a3)
This commit is contained in:
Tsuyoshi Ozawa 2015-12-08 13:31:23 +09:00
parent 203c7a648b
commit b345ffd7df
2 changed files with 18 additions and 27 deletions

View File

@ -51,6 +51,9 @@ Release 2.6.3 - UNRELEASED
YARN-4365. FileSystemNodeLabelStore should check for root dir existence on YARN-4365. FileSystemNodeLabelStore should check for root dir existence on
startup (Kuhu Shukla via jlowe) startup (Kuhu Shukla via jlowe)
YARN-4348. ZKRMStateStore.syncInternal shouldn't wait for sync completion for
avoiding blocking ZK's event thread. (ozawa)
Release 2.6.2 - 2015-10-28 Release 2.6.2 - 2015-10-28
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -114,12 +114,10 @@ public class ZKRMStateStore extends RMStateStore {
private List<ZKUtil.ZKAuthInfo> zkAuths; private List<ZKUtil.ZKAuthInfo> zkAuths;
class ZKSyncOperationCallback implements AsyncCallback.VoidCallback { class ZKSyncOperationCallback implements AsyncCallback.VoidCallback {
public final CountDownLatch latch = new CountDownLatch(1);
@Override @Override
public void processResult(int rc, String path, Object ctx){ public void processResult(int rc, String path, Object ctx){
if (rc == Code.OK.intValue()) { if (rc == Code.OK.intValue()) {
LOG.info("ZooKeeper sync operation succeeded. path: " + path); LOG.info("ZooKeeper sync operation succeeded. path: " + path);
latch.countDown();
} else { } else {
LOG.fatal("ZooKeeper sync operation failed. Waiting for session " + LOG.fatal("ZooKeeper sync operation failed. Waiting for session " +
"timeout. path: " + path); "timeout. path: " + path);
@ -959,16 +957,20 @@ public class ZKRMStateStore extends RMStateStore {
* @return true if ZK.sync() succeededs, false if ZK.sync() fails. * @return true if ZK.sync() succeededs, false if ZK.sync() fails.
* @throws InterruptedException * @throws InterruptedException
*/ */
private boolean syncInternal(String path) throws InterruptedException { private void syncInternal(final String path) throws InterruptedException {
ZKSyncOperationCallback cb = new ZKSyncOperationCallback(); final ZKSyncOperationCallback cb = new ZKSyncOperationCallback();
if (path != null) { final String pathForSync = (path != null) ? path : zkRootNodePath;
zkClient.sync(path, cb, null); try {
} else { new ZKAction<Void>() {
zkClient.sync(zkRootNodePath, cb, null); @Override
Void run() throws KeeperException, InterruptedException {
zkClient.sync(pathForSync, cb, null);
return null;
}
}.runWithRetries();
} catch (Exception e) {
LOG.fatal("sync failed.");
} }
boolean succeededToSync = cb.latch.await(
zkSessionTimeout, TimeUnit.MILLISECONDS);
return succeededToSync;
} }
/** /**
@ -1181,22 +1183,8 @@ public class ZKRMStateStore extends RMStateStore {
"Retry no. " + retry); "Retry no. " + retry);
Thread.sleep(zkRetryInterval); Thread.sleep(zkRetryInterval);
createConnection(); createConnection();
boolean succeededToSync = false; syncInternal(ke.getPath());
try { continue;
succeededToSync = syncInternal(ke.getPath());
} catch (InterruptedException ie) {
LOG.info("Interrupted sync operation. Giving up!");
Thread.currentThread().interrupt();
throw ke;
}
if (succeededToSync) {
// continue the operation.
continue;
} else {
// Giving up since new connection without sync can occur an
// unexpected view from the client like YARN-3798.
LOG.info("Failed to sync with ZK new connection.");
}
} }
LOG.info("Maxed out ZK retries. Giving up!"); LOG.info("Maxed out ZK retries. Giving up!");
throw ke; throw ke;