SOLR-6405: ZooKeeper calls can easily not be retried enough on ConnectionLoss.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1619810 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2014-08-22 14:02:06 +00:00
parent aad543ee1e
commit f06d3bf977
3 changed files with 50 additions and 6 deletions

View File

@ -340,7 +340,10 @@ Bug Fixes
* SOLR-6402: OverseerCollectionProcessor should not exit for ZooKeeper ConnectionLoss.
(Jessica Cheng via Mark Miller)
* SOLR-6405: ZooKeeper calls can easily not be retried enough on ConnectionLoss.
(Jessica Cheng, Mark Miller)
Optimizations
---------------------

View File

@ -24,6 +24,8 @@ import java.util.concurrent.atomic.AtomicInteger;
import junit.framework.Assert;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkCmdExecutor;
import org.apache.solr.common.cloud.ZkOperation;
import org.apache.solr.util.AbstractSolrTestCase;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
@ -198,6 +200,42 @@ public class ZkSolrClientTest extends AbstractSolrTestCase {
}
}
}
public void testZkCmdExectutor() throws Exception {
String zkDir = createTempDir("zkData").getAbsolutePath();
ZkTestServer server = null;
try {
server = new ZkTestServer(zkDir);
server.run();
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
final int timeout = random().nextInt(10000) + 5000;
ZkCmdExecutor zkCmdExecutor = new ZkCmdExecutor(timeout);
final long start = System.nanoTime();
try {
zkCmdExecutor.retryOperation(new ZkOperation() {
@Override
public String execute() throws KeeperException, InterruptedException {
if (System.nanoTime() - start > TimeUnit.NANOSECONDS.convert(timeout, TimeUnit.MILLISECONDS)) {
throw new KeeperException.SessionExpiredException();
}
throw new KeeperException.ConnectionLossException();
}
});
} catch(KeeperException.SessionExpiredException e) {
} catch (Exception e) {
fail("Expected " + KeeperException.SessionExpiredException.class.getSimpleName() + " but got " + e.getClass().getSimpleName());
}
} finally {
if (server != null) {
server.shutdown();
}
}
}
public void testMultipleWatchesAsync() throws Exception {
try (ZkConnection conn = new ZkConnection ()) {

View File

@ -27,9 +27,10 @@ import org.apache.zookeeper.data.ACL;
public class ZkCmdExecutor {
private long retryDelay = 1500L; // 500 ms over for padding
private long retryDelay = 1500L; // 1500 ms over for padding
private int retryCount;
private List<ACL> acl = ZooDefs.Ids.OPEN_ACL_UNSAFE;
private double timeouts;
/**
* TODO: At this point, this should probably take a SolrZkClient in
@ -40,8 +41,8 @@ public class ZkCmdExecutor {
* with this class.
*/
public ZkCmdExecutor(int timeoutms) {
double timeouts = timeoutms / 1000.0;
this.retryCount = Math.round(0.5f * ((float)Math.sqrt(8.0f * timeouts + 1.0f) - 1.0f));
timeouts = timeoutms / 1000.0;
this.retryCount = Math.round(0.5f * ((float)Math.sqrt(8.0f * timeouts + 1.0f) - 1.0f)) + 1;
}
public List<ACL> getAcl() {
@ -84,7 +85,9 @@ public class ZkCmdExecutor {
throw exception;
}
}
retryDelay(i);
if (i != retryCount -1) {
retryDelay(i);
}
}
}
throw exception;
@ -116,7 +119,7 @@ public class ZkCmdExecutor {
*/
protected void retryDelay(int attemptCount) throws InterruptedException {
if (attemptCount > 0) {
Thread.sleep(attemptCount * retryDelay);
Thread.sleep((attemptCount + 1) * retryDelay);
}
}