YARN-3798. ZKRMStateStore shouldn't create new session without occurrance of SESSIONEXPIED. (ozawa and Varun Saxena)

This commit is contained in:
Tsuyoshi Ozawa 2015-10-21 23:08:02 +09:00
parent ac865de725
commit b898f8014f
2 changed files with 79 additions and 2 deletions

View File

@ -45,6 +45,9 @@ Release 2.6.2 - UNRELEASED
YARN-3896. RMNode transitioned from RUNNING to REBOOTED because its response id YARN-3896. RMNode transitioned from RUNNING to REBOOTED because its response id
has not been reset synchronously. (Jun Gong via rohithsharmaks) has not been reset synchronously. (Jun Gong via rohithsharmaks)
YARN-3798. ZKRMStateStore shouldn't create new session without occurrance of
SESSIONEXPIED. (ozawa and Varun Saxena)
Release 2.6.1 - 2015-09-23 Release 2.6.1 - 2015-09-23
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -28,6 +28,8 @@ import java.security.SecureRandom;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -63,6 +65,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.Ap
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.EpochPBImpl; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.EpochPBImpl;
import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.zookeeper.AsyncCallback;
import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.Code; import org.apache.zookeeper.KeeperException.Code;
@ -110,6 +113,20 @@ public class ZKRMStateStore extends RMStateStore {
private List<ACL> zkAcl; private List<ACL> zkAcl;
private List<ZKUtil.ZKAuthInfo> zkAuths; private List<ZKUtil.ZKAuthInfo> zkAuths;
class ZKSyncOperationCallback implements AsyncCallback.VoidCallback {
public final CountDownLatch latch = new CountDownLatch(1);
@Override
public void processResult(int rc, String path, Object ctx){
if (rc == Code.OK.intValue()) {
LOG.info("ZooKeeper sync operation succeeded. path: " + path);
latch.countDown();
} else {
LOG.fatal("ZooKeeper sync operation failed. Waiting for session " +
"timeout. path: " + path);
}
}
}
/** /**
* *
* ROOT_DIR_PATH * ROOT_DIR_PATH
@ -295,6 +312,7 @@ public class ZKRMStateStore extends RMStateStore {
createRootDir(delegationTokensRootPath); createRootDir(delegationTokensRootPath);
createRootDir(dtSequenceNumberPath); createRootDir(dtSequenceNumberPath);
createRootDir(amrmTokenSecretManagerRoot); createRootDir(amrmTokenSecretManagerRoot);
syncInternal(zkRootNodePath);
} }
private void createRootDir(final String rootPath) throws Exception { private void createRootDir(final String rootPath) throws Exception {
@ -915,6 +933,7 @@ public class ZKRMStateStore extends RMStateStore {
// call listener to reconnect // call listener to reconnect
LOG.info("ZKRMStateStore Session expired"); LOG.info("ZKRMStateStore Session expired");
createConnection(); createConnection();
syncInternal(event.getPath());
break; break;
default: default:
LOG.error("Unexpected Zookeeper" + LOG.error("Unexpected Zookeeper" +
@ -931,6 +950,27 @@ public class ZKRMStateStore extends RMStateStore {
return (root + "/" + nodeName); return (root + "/" + nodeName);
} }
/**
* Helper method to call ZK's sync() after calling createConnection().
* Note that sync path is meaningless for now:
* http://mail-archives.apache.org/mod_mbox/zookeeper-user/201102.mbox/browser
* @param path path to sync, nullable value. If the path is null,
* zkRootNodePath is used to sync.
* @return true if ZK.sync() succeededs, false if ZK.sync() fails.
* @throws InterruptedException
*/
private boolean syncInternal(String path) throws InterruptedException {
ZKSyncOperationCallback cb = new ZKSyncOperationCallback();
if (path != null) {
zkClient.sync(path, cb, null);
} else {
zkClient.sync(zkRootNodePath, cb, null);
}
boolean succeededToSync = cb.latch.await(
zkSessionTimeout, TimeUnit.MILLISECONDS);
return succeededToSync;
}
/** /**
* Helper method that creates fencing node, executes the passed operations, * Helper method that creates fencing node, executes the passed operations,
* and deletes the fencing node. * and deletes the fencing node.
@ -1091,6 +1131,18 @@ public class ZKRMStateStore extends RMStateStore {
switch (code) { switch (code) {
case CONNECTIONLOSS: case CONNECTIONLOSS:
case OPERATIONTIMEOUT: case OPERATIONTIMEOUT:
return true;
default:
break;
}
return false;
}
private boolean shouldRetryWithNewConnection(Code code) {
// For fast recovery, we choose to close current connection after
// SESSIONMOVED occurs. Latest state of a zknode path is ensured by
// following zk.sync(path) operation.
switch (code) {
case SESSIONEXPIRED: case SESSIONEXPIRED:
case SESSIONMOVED: case SESSIONMOVED:
return true; return true;
@ -1118,12 +1170,34 @@ public class ZKRMStateStore extends RMStateStore {
return null; return null;
} }
LOG.info("Exception while executing a ZK operation.", ke); LOG.info("Exception while executing a ZK operation.", ke);
if (shouldRetry(ke.code()) && ++retry < numRetries) { retry++;
if (shouldRetry(ke.code()) && retry < numRetries) {
LOG.info("Retrying operation on ZK. Retry no. " + retry); LOG.info("Retrying operation on ZK. Retry no. " + retry);
Thread.sleep(zkRetryInterval); Thread.sleep(zkRetryInterval);
createConnection();
continue; continue;
} }
if (shouldRetryWithNewConnection(ke.code()) && retry < numRetries) {
LOG.info("Retrying operation on ZK with new Connection. " +
"Retry no. " + retry);
Thread.sleep(zkRetryInterval);
createConnection();
boolean succeededToSync = false;
try {
succeededToSync = syncInternal(ke.getPath());
} catch (InterruptedException ie) {
LOG.info("Interrupted sync operation. Giving up!");
Thread.currentThread().interrupt();
throw ke;
}
if (succeededToSync) {
// continue the operation.
continue;
} else {
// Giving up since new connection without sync can occur an
// unexpected view from the client like YARN-3798.
LOG.info("Failed to sync with ZK new connection.");
}
}
LOG.info("Maxed out ZK retries. Giving up!"); LOG.info("Maxed out ZK retries. Giving up!");
throw ke; throw ke;
} }