SOLR-5721: ConnectionManager can become stuck in likeExpired.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1568337 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2014-02-14 15:36:00 +00:00
parent 9c7b41ddba
commit f8a0cae647
3 changed files with 88 additions and 53 deletions

View File

@ -328,6 +328,9 @@ Bug Fixes
* SOLR-5675: cloud-scripts/zkcli.bat: quote option log4j
(Günther Ruck via steffkes
* SOLR-5721: ConnectionManager can become stuck in likeExpired.
(Gregory Chanan via Mark Miller)
Optimizations
----------------------

View File

@ -23,6 +23,9 @@ import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.cloud.ConnectionManager;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher.Event.EventType;
import org.apache.zookeeper.Watcher.Event.KeeperState;
import org.junit.Ignore;
@Slow
@ -67,4 +70,50 @@ public class ConnectionManagerTest extends SolrTestCaseJ4 {
}
}
public void testLikelyExpired() throws Exception {
createTempDir();
// setup a SolrZkClient to do some getBaseUrlForNodeName testing
String zkDir = dataDir.getAbsolutePath() + File.separator
+ "zookeeper/server1/data";
ZkTestServer server = new ZkTestServer(zkDir);
try {
server.run();
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
ConnectionManager cm = zkClient.getConnectionManager();
try {
assertFalse(cm.isLikelyExpired());
assertTrue(cm.isConnected());
cm.process(new WatchedEvent(EventType.None, KeeperState.Disconnected, ""));
// disconnect shouldn't immediately set likelyExpired
assertFalse(cm.isConnected());
assertFalse(cm.isLikelyExpired());
// but it should after the timeout
Thread.sleep((long)(zkClient.getZkClientTimeout() * 1.5));
assertFalse(cm.isConnected());
assertTrue(cm.isLikelyExpired());
// even if we disconnect immediately again
cm.process(new WatchedEvent(EventType.None, KeeperState.Disconnected, ""));
assertFalse(cm.isConnected());
assertTrue(cm.isLikelyExpired());
// reconnect -- should no longer be likely expired
cm.process(new WatchedEvent(EventType.None, KeeperState.SyncConnected, ""));
assertFalse(cm.isLikelyExpired());
assertTrue(cm.isConnected());
} finally {
cm.close();
zkClient.close();
}
} finally {
server.shutdown();
}
}
}

View File

@ -17,8 +17,6 @@ package org.apache.solr.common.cloud;
* limitations under the License.
*/
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.TimeoutException;
import org.apache.solr.common.SolrException;
@ -46,9 +44,32 @@ public class ConnectionManager implements Watcher {
private final BeforeReconnect beforeReconnect;
private volatile boolean isClosed = false;
private volatile boolean likelyExpired = true;
private volatile Timer disconnectedTimer;
// Track the likely expired state
private static class LikelyExpiredState {
private static LikelyExpiredState NOT_EXPIRED = new LikelyExpiredState(StateType.NOT_EXPIRED, 0);
private static LikelyExpiredState EXPIRED = new LikelyExpiredState(StateType.EXPIRED, 0);
public enum StateType {
NOT_EXPIRED, // definitely not expired
EXPIRED, // definitely expired
TRACKING_TIME // not sure, tracking time of last disconnect
}
private StateType stateType;
private long lastDisconnectTime;
public LikelyExpiredState(StateType stateType, long lastDisconnectTime) {
this.stateType = stateType;
this.lastDisconnectTime = lastDisconnectTime;
}
public boolean isLikelyExpired(long timeToExpire) {
return stateType == StateType.EXPIRED
|| ( stateType == StateType.TRACKING_TIME && (System.currentTimeMillis() - lastDisconnectTime > timeToExpire));
}
}
private volatile LikelyExpiredState likelyExpiredState = LikelyExpiredState.EXPIRED;
public ConnectionManager(String name, SolrZkClient client, String zkServerAddress, ZkClientConnectionStrategy strat, OnReconnect onConnect, BeforeReconnect beforeReconnect) {
this.name = name;
@ -60,53 +81,20 @@ public class ConnectionManager implements Watcher {
}
private synchronized void connected() {
cancelTimer();
connected = true;
likelyExpired = false;
likelyExpiredState = LikelyExpiredState.NOT_EXPIRED;
notifyAll();
}
private synchronized void disconnected() {
cancelTimer();
if (!isClosed) {
Timer newDcTimer = new Timer(true);
newDcTimer.schedule(new TimerTask() {
@Override
public void run() {
likelyExpired = true;
}
}, (long) (client.getZkClientTimeout() * 0.90));
if (isClosed) {
// we might have closed after getting by isClosed
// and before starting the new timer
newDcTimer.cancel();
} else {
disconnectedTimer = newDcTimer;
if (isClosed) {
// now deal with we may have been closed after getting
// by isClosed but before setting disconnectedTimer -
// if close happens after isClosed check this time, it
// will handle stopping the timer
cancelTimer();
}
}
}
connected = false;
// record the time we expired unless we are already likely expired
if (!likelyExpiredState.isLikelyExpired(0)) {
likelyExpiredState = new LikelyExpiredState(LikelyExpiredState.StateType.TRACKING_TIME, System.currentTimeMillis());
}
notifyAll();
}
private void cancelTimer() {
try {
this.disconnectedTimer.cancel();
} catch (NullPointerException e) {
// fine
} finally {
this.disconnectedTimer = null;
}
}
@Override
public void process(WatchedEvent event) {
if (log.isInfoEnabled()) {
@ -125,13 +113,9 @@ public class ConnectionManager implements Watcher {
connected();
connectionStrategy.connected();
} else if (state == KeeperState.Expired) {
// we don't call disconnected because there
// is no need to start the timer - if we are expired
// likelyExpired can just be set to true
cancelTimer();
// we don't call disconnected here, because we know we are expired
connected = false;
likelyExpired = true;
likelyExpiredState = LikelyExpiredState.EXPIRED;
log.info("Our previous ZooKeeper session was expired. Attempting to reconnect to recover relationship with ZooKeeper...");
@ -206,12 +190,11 @@ public class ConnectionManager implements Watcher {
// to avoid possible deadlock on shutdown
public void close() {
this.isClosed = true;
this.likelyExpired = true;
cancelTimer();
this.likelyExpiredState = LikelyExpiredState.EXPIRED;
}
public boolean isLikelyExpired() {
return likelyExpired;
return isClosed || likelyExpiredState.isLikelyExpired((long) (client.getZkClientTimeout() * 0.90));
}
public synchronized void waitForConnected(long waitForConnection)