mirror of https://github.com/apache/lucene.git
SOLR-5721: ConnectionManager can become stuck in likeExpired.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1568337 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9c7b41ddba
commit
f8a0cae647
|
@ -328,6 +328,9 @@ Bug Fixes
|
|||
* SOLR-5675: cloud-scripts/zkcli.bat: quote option log4j
|
||||
(Günther Ruck via steffkes
|
||||
|
||||
* SOLR-5721: ConnectionManager can become stuck in likeExpired.
|
||||
(Gregory Chanan via Mark Miller)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -23,6 +23,9 @@ import org.apache.lucene.util.LuceneTestCase.Slow;
|
|||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.cloud.ConnectionManager;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.zookeeper.WatchedEvent;
|
||||
import org.apache.zookeeper.Watcher.Event.EventType;
|
||||
import org.apache.zookeeper.Watcher.Event.KeeperState;
|
||||
import org.junit.Ignore;
|
||||
|
||||
@Slow
|
||||
|
@ -66,5 +69,51 @@ public class ConnectionManagerTest extends SolrTestCaseJ4 {
|
|||
server.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void testLikelyExpired() throws Exception {
|
||||
|
||||
createTempDir();
|
||||
// setup a SolrZkClient to do some getBaseUrlForNodeName testing
|
||||
String zkDir = dataDir.getAbsolutePath() + File.separator
|
||||
+ "zookeeper/server1/data";
|
||||
|
||||
ZkTestServer server = new ZkTestServer(zkDir);
|
||||
try {
|
||||
server.run();
|
||||
|
||||
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
|
||||
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
|
||||
|
||||
SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
|
||||
ConnectionManager cm = zkClient.getConnectionManager();
|
||||
try {
|
||||
assertFalse(cm.isLikelyExpired());
|
||||
assertTrue(cm.isConnected());
|
||||
cm.process(new WatchedEvent(EventType.None, KeeperState.Disconnected, ""));
|
||||
// disconnect shouldn't immediately set likelyExpired
|
||||
assertFalse(cm.isConnected());
|
||||
assertFalse(cm.isLikelyExpired());
|
||||
|
||||
// but it should after the timeout
|
||||
Thread.sleep((long)(zkClient.getZkClientTimeout() * 1.5));
|
||||
assertFalse(cm.isConnected());
|
||||
assertTrue(cm.isLikelyExpired());
|
||||
|
||||
// even if we disconnect immediately again
|
||||
cm.process(new WatchedEvent(EventType.None, KeeperState.Disconnected, ""));
|
||||
assertFalse(cm.isConnected());
|
||||
assertTrue(cm.isLikelyExpired());
|
||||
|
||||
// reconnect -- should no longer be likely expired
|
||||
cm.process(new WatchedEvent(EventType.None, KeeperState.SyncConnected, ""));
|
||||
assertFalse(cm.isLikelyExpired());
|
||||
assertTrue(cm.isConnected());
|
||||
} finally {
|
||||
cm.close();
|
||||
zkClient.close();
|
||||
}
|
||||
} finally {
|
||||
server.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,8 +17,6 @@ package org.apache.solr.common.cloud;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Timer;
|
||||
import java.util.TimerTask;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -46,9 +44,32 @@ public class ConnectionManager implements Watcher {
|
|||
private final BeforeReconnect beforeReconnect;
|
||||
|
||||
private volatile boolean isClosed = false;
|
||||
private volatile boolean likelyExpired = true;
|
||||
|
||||
private volatile Timer disconnectedTimer;
|
||||
|
||||
// Track the likely expired state
|
||||
private static class LikelyExpiredState {
|
||||
private static LikelyExpiredState NOT_EXPIRED = new LikelyExpiredState(StateType.NOT_EXPIRED, 0);
|
||||
private static LikelyExpiredState EXPIRED = new LikelyExpiredState(StateType.EXPIRED, 0);
|
||||
|
||||
public enum StateType {
|
||||
NOT_EXPIRED, // definitely not expired
|
||||
EXPIRED, // definitely expired
|
||||
TRACKING_TIME // not sure, tracking time of last disconnect
|
||||
}
|
||||
|
||||
private StateType stateType;
|
||||
private long lastDisconnectTime;
|
||||
public LikelyExpiredState(StateType stateType, long lastDisconnectTime) {
|
||||
this.stateType = stateType;
|
||||
this.lastDisconnectTime = lastDisconnectTime;
|
||||
}
|
||||
|
||||
public boolean isLikelyExpired(long timeToExpire) {
|
||||
return stateType == StateType.EXPIRED
|
||||
|| ( stateType == StateType.TRACKING_TIME && (System.currentTimeMillis() - lastDisconnectTime > timeToExpire));
|
||||
}
|
||||
}
|
||||
|
||||
private volatile LikelyExpiredState likelyExpiredState = LikelyExpiredState.EXPIRED;
|
||||
|
||||
public ConnectionManager(String name, SolrZkClient client, String zkServerAddress, ZkClientConnectionStrategy strat, OnReconnect onConnect, BeforeReconnect beforeReconnect) {
|
||||
this.name = name;
|
||||
|
@ -60,51 +81,18 @@ public class ConnectionManager implements Watcher {
|
|||
}
|
||||
|
||||
private synchronized void connected() {
|
||||
cancelTimer();
|
||||
connected = true;
|
||||
likelyExpired = false;
|
||||
likelyExpiredState = LikelyExpiredState.NOT_EXPIRED;
|
||||
notifyAll();
|
||||
}
|
||||
|
||||
private synchronized void disconnected() {
|
||||
cancelTimer();
|
||||
if (!isClosed) {
|
||||
Timer newDcTimer = new Timer(true);
|
||||
newDcTimer.schedule(new TimerTask() {
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
likelyExpired = true;
|
||||
}
|
||||
|
||||
}, (long) (client.getZkClientTimeout() * 0.90));
|
||||
if (isClosed) {
|
||||
// we might have closed after getting by isClosed
|
||||
// and before starting the new timer
|
||||
newDcTimer.cancel();
|
||||
} else {
|
||||
disconnectedTimer = newDcTimer;
|
||||
if (isClosed) {
|
||||
// now deal with we may have been closed after getting
|
||||
// by isClosed but before setting disconnectedTimer -
|
||||
// if close happens after isClosed check this time, it
|
||||
// will handle stopping the timer
|
||||
cancelTimer();
|
||||
}
|
||||
}
|
||||
}
|
||||
connected = false;
|
||||
notifyAll();
|
||||
}
|
||||
|
||||
private void cancelTimer() {
|
||||
try {
|
||||
this.disconnectedTimer.cancel();
|
||||
} catch (NullPointerException e) {
|
||||
// fine
|
||||
} finally {
|
||||
this.disconnectedTimer = null;
|
||||
// record the time we expired unless we are already likely expired
|
||||
if (!likelyExpiredState.isLikelyExpired(0)) {
|
||||
likelyExpiredState = new LikelyExpiredState(LikelyExpiredState.StateType.TRACKING_TIME, System.currentTimeMillis());
|
||||
}
|
||||
notifyAll();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -125,13 +113,9 @@ public class ConnectionManager implements Watcher {
|
|||
connected();
|
||||
connectionStrategy.connected();
|
||||
} else if (state == KeeperState.Expired) {
|
||||
// we don't call disconnected because there
|
||||
// is no need to start the timer - if we are expired
|
||||
// likelyExpired can just be set to true
|
||||
cancelTimer();
|
||||
|
||||
// we don't call disconnected here, because we know we are expired
|
||||
connected = false;
|
||||
likelyExpired = true;
|
||||
likelyExpiredState = LikelyExpiredState.EXPIRED;
|
||||
|
||||
log.info("Our previous ZooKeeper session was expired. Attempting to reconnect to recover relationship with ZooKeeper...");
|
||||
|
||||
|
@ -206,12 +190,11 @@ public class ConnectionManager implements Watcher {
|
|||
// to avoid possible deadlock on shutdown
|
||||
public void close() {
|
||||
this.isClosed = true;
|
||||
this.likelyExpired = true;
|
||||
cancelTimer();
|
||||
this.likelyExpiredState = LikelyExpiredState.EXPIRED;
|
||||
}
|
||||
|
||||
public boolean isLikelyExpired() {
|
||||
return likelyExpired;
|
||||
return isClosed || likelyExpiredState.isLikelyExpired((long) (client.getZkClientTimeout() * 0.90));
|
||||
}
|
||||
|
||||
public synchronized void waitForConnected(long waitForConnection)
|
||||
|
|
Loading…
Reference in New Issue