mirror of https://github.com/apache/lucene.git
SOLR-6944: ReplicationFactorTest and HttpPartitionTest both fail with org.apache.http.NoHttpResponseException: The target server failed to respond
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1656056 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
669e9cf617
commit
fd35bd5ae4
|
@ -17,14 +17,17 @@ package org.apache.solr.cloud;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.http.NoHttpResponseException;
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.apache.solr.JSONTestUtil;
|
||||
import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
|
@ -41,6 +44,7 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
@ -123,7 +127,7 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
|
|||
protected void testLeaderInitiatedRecoveryCRUD() throws Exception {
|
||||
String testCollectionName = "c8n_crud_1x2";
|
||||
String shardId = "shard1";
|
||||
createCollection(testCollectionName, 1, 2, 1);
|
||||
createCollectionRetry(testCollectionName, 1, 2, 1);
|
||||
cloudClient.setDefaultCollection(testCollectionName);
|
||||
|
||||
Replica leader =
|
||||
|
@ -172,7 +176,7 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
|
|||
protected void testRf2() throws Exception {
|
||||
// create a collection that has 1 shard but 2 replicas
|
||||
String testCollectionName = "c8n_1x2";
|
||||
createCollection(testCollectionName, 1, 2, 1);
|
||||
createCollectionRetry(testCollectionName, 1, 2, 1);
|
||||
cloudClient.setDefaultCollection(testCollectionName);
|
||||
|
||||
sendDoc(1);
|
||||
|
@ -253,11 +257,12 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
|
|||
protected void testRf3() throws Exception {
|
||||
// create a collection that has 1 shard but 2 replicas
|
||||
String testCollectionName = "c8n_1x3";
|
||||
createCollection(testCollectionName, 1, 3, 1);
|
||||
createCollectionRetry(testCollectionName, 1, 3, 1);
|
||||
|
||||
cloudClient.setDefaultCollection(testCollectionName);
|
||||
|
||||
sendDoc(1);
|
||||
|
||||
|
||||
List<Replica> notLeaders =
|
||||
ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
|
||||
assertTrue("Expected 2 replicas for collection " + testCollectionName
|
||||
|
@ -306,11 +311,27 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
|
|||
}
|
||||
}
|
||||
|
||||
private void createCollectionRetry(String testCollectionName, int numShards, int replicationFactor, int maxShardsPerNode)
|
||||
throws SolrServerException, IOException {
|
||||
CollectionAdminResponse resp = createCollection(testCollectionName, numShards, replicationFactor, maxShardsPerNode);
|
||||
if (resp.getResponse().get("failure") != null) {
|
||||
CollectionAdminRequest.Delete req = new CollectionAdminRequest.Delete();
|
||||
req.setCollectionName(testCollectionName);
|
||||
req.process(cloudClient);
|
||||
|
||||
resp = createCollection(testCollectionName, numShards, replicationFactor, maxShardsPerNode);
|
||||
|
||||
if (resp.getResponse().get("failure") != null) {
|
||||
fail("Could not create " + testCollectionName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// test inspired by SOLR-6511
|
||||
protected void testLeaderZkSessionLoss() throws Exception {
|
||||
|
||||
String testCollectionName = "c8n_1x2_leader_session_loss";
|
||||
createCollection(testCollectionName, 1, 2, 1);
|
||||
createCollectionRetry(testCollectionName, 1, 2, 1);
|
||||
cloudClient.setDefaultCollection(testCollectionName);
|
||||
|
||||
sendDoc(1);
|
||||
|
@ -329,7 +350,7 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
|
|||
testCollectionName+"; clusterState: "+printClusterStateInfo(testCollectionName), leader);
|
||||
JettySolrRunner leaderJetty = getJettyOnPort(getReplicaPort(leader));
|
||||
|
||||
HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName);
|
||||
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField(id, String.valueOf(2));
|
||||
doc.addField("a_t", "hello" + 2);
|
||||
|
@ -360,7 +381,8 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
|
|||
// TODO: This test logic seems to be timing dependent and fails on Jenkins
|
||||
// need to come up with a better approach
|
||||
log.info("Sending doc 2 to old leader "+leader.getName());
|
||||
try {
|
||||
try ( HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName)) {
|
||||
|
||||
leaderSolr.add(doc);
|
||||
leaderSolr.close();
|
||||
|
||||
|
@ -374,7 +396,7 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
|
|||
try (HttpSolrClient client = getHttpSolrClient(currentLeader, testCollectionName)) {
|
||||
client.add(doc); // this should work
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<Replica> participatingReplicas = getActiveOrRecoveringReplicas(testCollectionName, "shard1");
|
||||
Set<String> replicasToCheck = new HashSet<>();
|
||||
|
@ -452,17 +474,37 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
|
|||
return new HttpSolrClient(url);
|
||||
}
|
||||
|
||||
protected void sendDoc(int docId) throws Exception {
|
||||
protected void doSendDoc(int docid) throws Exception {
|
||||
UpdateRequest up = new UpdateRequest();
|
||||
up.setParam(UpdateRequest.MIN_REPFACT, String.valueOf(2));
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField(id, String.valueOf(docId));
|
||||
doc.addField("a_t", "hello" + docId);
|
||||
doc.addField(id, String.valueOf(docid));
|
||||
doc.addField("a_t", "hello" + docid);
|
||||
up.add(doc);
|
||||
int minAchievedRf =
|
||||
cloudClient.getMinAchievedReplicationFactor(cloudClient.getDefaultCollection(), cloudClient.request(up));
|
||||
}
|
||||
|
||||
protected void sendDoc(int docId) throws Exception {
|
||||
try {
|
||||
doSendDoc(docId);
|
||||
} catch (SolrServerException e) {
|
||||
if (e.getRootCause() instanceof NoHttpResponseException) {
|
||||
// we don't know if the doc was accepted or not, we send again
|
||||
Thread.sleep(100);
|
||||
try {
|
||||
doSendDoc(docId);
|
||||
} catch (SolrServerException e2) {
|
||||
if (e2.getRootCause() instanceof NoHttpResponseException) {
|
||||
// we don't know if the doc was accepted or not, we send again
|
||||
Thread.sleep(3000);
|
||||
doSendDoc(docId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Query the real-time get handler for a specific doc by ID to verify it
|
||||
* exists in the provided server, using distrib=false so it doesn't route to another replica.
|
||||
|
|
|
@ -17,11 +17,19 @@ package org.apache.solr.cloud;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.net.ServerSocket;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.cloud.ZkCoreNodeProps;
|
||||
|
@ -30,12 +38,6 @@ import org.junit.Test;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.net.ServerSocket;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
//@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-6157")
|
||||
|
||||
/**
|
||||
|
@ -128,7 +130,20 @@ public class ReplicationFactorTest extends AbstractFullDistribZkTestBase {
|
|||
String shardId = "shard1";
|
||||
int minRf = 2;
|
||||
|
||||
createCollection(testCollectionName, numShards, replicationFactor, maxShardsPerNode);
|
||||
CollectionAdminResponse resp = createCollection(testCollectionName, numShards, replicationFactor, maxShardsPerNode);
|
||||
|
||||
if (resp.getResponse().get("failure") != null) {
|
||||
CollectionAdminRequest.Delete req = new CollectionAdminRequest.Delete();
|
||||
req.setCollectionName(testCollectionName);
|
||||
req.process(cloudClient);
|
||||
|
||||
resp = createCollection(testCollectionName, numShards, replicationFactor, maxShardsPerNode);
|
||||
|
||||
if (resp.getResponse().get("failure") != null) {
|
||||
fail("Could not create " + testCollectionName);
|
||||
}
|
||||
}
|
||||
|
||||
cloudClient.setDefaultCollection(testCollectionName);
|
||||
|
||||
List<Replica> replicas =
|
||||
|
@ -149,8 +164,8 @@ public class ReplicationFactorTest extends AbstractFullDistribZkTestBase {
|
|||
up.add(batch);
|
||||
|
||||
Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, shardId);
|
||||
sendNonDirectUpdateRequestReplica(leader, up, 2, testCollectionName);
|
||||
sendNonDirectUpdateRequestReplica(replicas.get(0), up, 2, testCollectionName);
|
||||
sendNonDirectUpdateRequestReplicaWithRetry(leader, up, 2, testCollectionName);
|
||||
sendNonDirectUpdateRequestReplicaWithRetry(replicas.get(0), up, 2, testCollectionName);
|
||||
|
||||
// so now kill the replica of shard2 and verify the achieved rf is only 1
|
||||
List<Replica> shard2Replicas =
|
||||
|
@ -162,8 +177,8 @@ public class ReplicationFactorTest extends AbstractFullDistribZkTestBase {
|
|||
Thread.sleep(2000);
|
||||
|
||||
// shard1 will have rf=2 but shard2 will only have rf=1
|
||||
sendNonDirectUpdateRequestReplica(leader, up, 1, testCollectionName);
|
||||
sendNonDirectUpdateRequestReplica(replicas.get(0), up, 1, testCollectionName);
|
||||
sendNonDirectUpdateRequestReplicaWithRetry(leader, up, 1, testCollectionName);
|
||||
sendNonDirectUpdateRequestReplicaWithRetry(replicas.get(0), up, 1, testCollectionName);
|
||||
|
||||
// heal the partition
|
||||
getProxyForReplica(shard2Replicas.get(0)).reopen();
|
||||
|
@ -171,6 +186,15 @@ public class ReplicationFactorTest extends AbstractFullDistribZkTestBase {
|
|||
Thread.sleep(2000);
|
||||
}
|
||||
|
||||
|
||||
protected void sendNonDirectUpdateRequestReplicaWithRetry(Replica replica, UpdateRequest up, int expectedRf, String collection) throws Exception {
|
||||
try {
|
||||
sendNonDirectUpdateRequestReplica(replica, up, expectedRf, collection);
|
||||
} catch (Exception e) {
|
||||
sendNonDirectUpdateRequestReplica(replica, up, expectedRf, collection);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
protected void sendNonDirectUpdateRequestReplica(Replica replica, UpdateRequest up, int expectedRf, String collection) throws Exception {
|
||||
|
||||
|
|
Loading…
Reference in New Issue