SOLR-6592: Avoid waiting for the leader to see the down state if that leader is not live.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1629719 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Timothy Potter 2014-10-06 17:50:00 +00:00
parent 19a2383920
commit ac7621e607
2 changed files with 33 additions and 0 deletions

View File

@ -20,8 +20,11 @@ package org.apache.solr.cloud;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.ConnectException;
import java.net.InetAddress;
import java.net.NetworkInterface;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.net.URLEncoder;
import java.net.UnknownHostException;
import java.util.ArrayList;
@ -41,6 +44,8 @@ import java.util.concurrent.TimeoutException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.http.NoHttpResponseException;
import org.apache.http.conn.ConnectTimeoutException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.request.CoreAdminRequest.WaitForState;
import org.apache.solr.common.SolrException;
@ -1623,6 +1628,23 @@ public final class ZkController {
server.request(prepCmd);
break;
} catch (Exception e) {
// if the core container is shutdown, don't wait
if (cc.isShutDown()) {
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE,
"Core container is shutdown.");
}
Throwable rootCause = SolrException.getRootCause(e);
if (rootCause instanceof IOException) {
// if there was a communication error talking to the leader, see if the leader is even alive
if (!zkStateReader.getClusterState().liveNodesContain(leaderProps.getNodeName())) {
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE,
"Node "+leaderProps.getNodeName()+" hosting leader for "+
shard+" in "+collection+" is not live!");
}
}
SolrException.log(log,
"There was a problem making a request to the leader", e);
try {

View File

@ -117,6 +117,10 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
// have the leader lose its Zk session temporarily
testLeaderZkSessionLoss();
waitForThingsToLevelOut(30000);
log.info("HttpParitionTest succeeded ... shutting down now!");
}
protected void testRf2() throws Exception {
@ -187,6 +191,8 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
// verify all docs received
assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, numDocs + 3);
log.info("testRf2 succeeded ... deleting the "+testCollectionName+" collection");
// try to clean up
try {
CollectionAdminRequest req = new CollectionAdminRequest.Delete();
@ -240,6 +246,9 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
sendDoc(4);
assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 4);
log.info("testRf3 succeeded ... deleting the "+testCollectionName+" collection");
// try to clean up
try {
CollectionAdminRequest req = new CollectionAdminRequest.Delete();
@ -334,6 +343,8 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
waitToSeeReplicasActive(testCollectionName, "shard1", replicasToCheck, 20);
assertDocsExistInAllReplicas(participatingReplicas, testCollectionName, 1, 2);
log.info("testLeaderZkSessionLoss succeeded ... deleting the "+testCollectionName+" collection");
// try to clean up
try {
CollectionAdminRequest req = new CollectionAdminRequest.Delete();