mirror of https://github.com/apache/lucene.git
SOLR-6405: ZooKeeper calls can easily not be retried enough on ConnectionLoss.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1619810 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
aad543ee1e
commit
f06d3bf977
|
@ -340,7 +340,10 @@ Bug Fixes
|
|||
|
||||
* SOLR-6402: OverseerCollectionProcessor should not exit for ZooKeeper ConnectionLoss.
|
||||
(Jessica Cheng via Mark Miller)
|
||||
|
||||
|
||||
* SOLR-6405: ZooKeeper calls can easily not be retried enough on ConnectionLoss.
|
||||
(Jessica Cheng, Mark Miller)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -24,6 +24,8 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
import junit.framework.Assert;
|
||||
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkCmdExecutor;
|
||||
import org.apache.solr.common.cloud.ZkOperation;
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.WatchedEvent;
|
||||
|
@ -198,6 +200,42 @@ public class ZkSolrClientTest extends AbstractSolrTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testZkCmdExectutor() throws Exception {
|
||||
String zkDir = createTempDir("zkData").getAbsolutePath();
|
||||
ZkTestServer server = null;
|
||||
|
||||
try {
|
||||
server = new ZkTestServer(zkDir);
|
||||
server.run();
|
||||
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
|
||||
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
|
||||
|
||||
final int timeout = random().nextInt(10000) + 5000;
|
||||
|
||||
ZkCmdExecutor zkCmdExecutor = new ZkCmdExecutor(timeout);
|
||||
final long start = System.nanoTime();
|
||||
try {
|
||||
zkCmdExecutor.retryOperation(new ZkOperation() {
|
||||
@Override
|
||||
public String execute() throws KeeperException, InterruptedException {
|
||||
if (System.nanoTime() - start > TimeUnit.NANOSECONDS.convert(timeout, TimeUnit.MILLISECONDS)) {
|
||||
throw new KeeperException.SessionExpiredException();
|
||||
}
|
||||
throw new KeeperException.ConnectionLossException();
|
||||
}
|
||||
});
|
||||
} catch(KeeperException.SessionExpiredException e) {
|
||||
|
||||
} catch (Exception e) {
|
||||
fail("Expected " + KeeperException.SessionExpiredException.class.getSimpleName() + " but got " + e.getClass().getSimpleName());
|
||||
}
|
||||
} finally {
|
||||
if (server != null) {
|
||||
server.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testMultipleWatchesAsync() throws Exception {
|
||||
try (ZkConnection conn = new ZkConnection ()) {
|
||||
|
|
|
@ -27,9 +27,10 @@ import org.apache.zookeeper.data.ACL;
|
|||
|
||||
|
||||
public class ZkCmdExecutor {
|
||||
private long retryDelay = 1500L; // 500 ms over for padding
|
||||
private long retryDelay = 1500L; // 1500 ms over for padding
|
||||
private int retryCount;
|
||||
private List<ACL> acl = ZooDefs.Ids.OPEN_ACL_UNSAFE;
|
||||
private double timeouts;
|
||||
|
||||
/**
|
||||
* TODO: At this point, this should probably take a SolrZkClient in
|
||||
|
@ -40,8 +41,8 @@ public class ZkCmdExecutor {
|
|||
* with this class.
|
||||
*/
|
||||
public ZkCmdExecutor(int timeoutms) {
|
||||
double timeouts = timeoutms / 1000.0;
|
||||
this.retryCount = Math.round(0.5f * ((float)Math.sqrt(8.0f * timeouts + 1.0f) - 1.0f));
|
||||
timeouts = timeoutms / 1000.0;
|
||||
this.retryCount = Math.round(0.5f * ((float)Math.sqrt(8.0f * timeouts + 1.0f) - 1.0f)) + 1;
|
||||
}
|
||||
|
||||
public List<ACL> getAcl() {
|
||||
|
@ -84,7 +85,9 @@ public class ZkCmdExecutor {
|
|||
throw exception;
|
||||
}
|
||||
}
|
||||
retryDelay(i);
|
||||
if (i != retryCount -1) {
|
||||
retryDelay(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
throw exception;
|
||||
|
@ -116,7 +119,7 @@ public class ZkCmdExecutor {
|
|||
*/
|
||||
protected void retryDelay(int attemptCount) throws InterruptedException {
|
||||
if (attemptCount > 0) {
|
||||
Thread.sleep(attemptCount * retryDelay);
|
||||
Thread.sleep((attemptCount + 1) * retryDelay);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue