mirror of https://github.com/apache/lucene.git
SOLR-8034: Leader no longer puts replicas in recovery in case of a failed update, when minRF isn't achieved.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1703289 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e91c414ba0
commit
c14de34a90
|
@ -264,6 +264,9 @@ Other Changes
|
||||||
* SOLR-7999: SolrRequestParser tests no longer depend on external URLs
|
* SOLR-7999: SolrRequestParser tests no longer depend on external URLs
|
||||||
that may fail to work. (Uwe Schindler)
|
that may fail to work. (Uwe Schindler)
|
||||||
|
|
||||||
|
* SOLR-8034: Leader no longer puts replicas in recovery in case of a failed update, when minRF
|
||||||
|
isn't achieved. (Jessica Cheng, Timothy Potter, Anshum Gupta)
|
||||||
|
|
||||||
================== 5.3.1 ==================
|
================== 5.3.1 ==================
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
|
@ -870,6 +870,11 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the client specified minRf and we didn't achieve the minRf, don't send recovery and let client retry
|
||||||
|
if (replicationTracker != null && replicationTracker.getAchievedRf() < replicationTracker.minRf) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (cloudDesc.getCoreNodeName().equals(leaderCoreNodeName) && foundErrorNodeInReplicaList) {
|
if (cloudDesc.getCoreNodeName().equals(leaderCoreNodeName) && foundErrorNodeInReplicaList) {
|
||||||
try {
|
try {
|
||||||
// if false, then the node is probably not "live" anymore
|
// if false, then the node is probably not "live" anymore
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||||
|
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||||
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
|
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
@ -38,7 +39,6 @@ import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.core.CoreContainer;
|
import org.apache.solr.core.CoreContainer;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.servlet.SolrDispatchFilter;
|
import org.apache.solr.servlet.SolrDispatchFilter;
|
||||||
import org.apache.solr.update.UpdateHandler;
|
|
||||||
import org.apache.solr.update.UpdateLog;
|
import org.apache.solr.update.UpdateLog;
|
||||||
import org.apache.solr.util.RTimer;
|
import org.apache.solr.util.RTimer;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
@ -50,7 +50,6 @@ import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -99,6 +98,12 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
|
||||||
|
|
||||||
testLeaderInitiatedRecoveryCRUD();
|
testLeaderInitiatedRecoveryCRUD();
|
||||||
|
|
||||||
|
// Tests that if we set a minRf that's not satisfied, no recovery is requested, but if minRf is satisfied,
|
||||||
|
// recovery is requested
|
||||||
|
testMinRf();
|
||||||
|
|
||||||
|
waitForThingsToLevelOut(30000);
|
||||||
|
|
||||||
// test a 1x2 collection
|
// test a 1x2 collection
|
||||||
testRf2();
|
testRf2();
|
||||||
|
|
||||||
|
@ -164,6 +169,90 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void testMinRf() throws Exception {
|
||||||
|
// create a collection that has 1 shard and 3 replicas
|
||||||
|
String testCollectionName = "collMinRf_1x3";
|
||||||
|
createCollection(testCollectionName, 1, 3, 1);
|
||||||
|
cloudClient.setDefaultCollection(testCollectionName);
|
||||||
|
|
||||||
|
sendDoc(1, 2);
|
||||||
|
|
||||||
|
List<Replica> notLeaders =
|
||||||
|
ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
|
||||||
|
assertTrue("Expected 2 non-leader replicas for collection " + testCollectionName
|
||||||
|
+ " but found " + notLeaders.size() + "; clusterState: "
|
||||||
|
+ printClusterStateInfo(testCollectionName),
|
||||||
|
notLeaders.size() == 2);
|
||||||
|
|
||||||
|
assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 1);
|
||||||
|
|
||||||
|
// Now introduce a network partition between the leader and 1 replica, so a minRf of 2 is still achieved
|
||||||
|
SocketProxy proxy0 = getProxyForReplica(notLeaders.get(0));
|
||||||
|
|
||||||
|
proxy0.close();
|
||||||
|
|
||||||
|
// indexing during a partition
|
||||||
|
int achievedRf = sendDoc(2, 2);
|
||||||
|
assertEquals("Unexpected achieved replication factor", 2, achievedRf);
|
||||||
|
|
||||||
|
Thread.sleep(sleepMsBeforeHealPartition);
|
||||||
|
|
||||||
|
// Verify that the partitioned replica is DOWN
|
||||||
|
ZkStateReader zkr = cloudClient.getZkStateReader();
|
||||||
|
zkr.updateClusterState(); // force the state to be fresh
|
||||||
|
ClusterState cs = zkr.getClusterState();
|
||||||
|
Collection<Slice> slices = cs.getActiveSlices(testCollectionName);
|
||||||
|
Slice slice = slices.iterator().next();
|
||||||
|
Replica partitionedReplica = slice.getReplica(notLeaders.get(0).getName());
|
||||||
|
assertEquals("The partitioned replica did not get marked down",
|
||||||
|
Replica.State.DOWN.toString(), partitionedReplica.getStr(ZkStateReader.STATE_PROP));
|
||||||
|
|
||||||
|
proxy0.reopen();
|
||||||
|
|
||||||
|
notLeaders =
|
||||||
|
ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
|
||||||
|
|
||||||
|
// Since minRf is achieved, we expect recovery, so we expect seeing 2 documents
|
||||||
|
assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 2);
|
||||||
|
|
||||||
|
// Now introduce a network partition between the leader and both of its replicas, so a minRf of 2 is NOT achieved
|
||||||
|
proxy0 = getProxyForReplica(notLeaders.get(0));
|
||||||
|
proxy0.close();
|
||||||
|
SocketProxy proxy1 = getProxyForReplica(notLeaders.get(1));
|
||||||
|
proxy1.close();
|
||||||
|
|
||||||
|
achievedRf = sendDoc(3, 2);
|
||||||
|
assertEquals("Unexpected achieved replication factor", 1, achievedRf);
|
||||||
|
|
||||||
|
Thread.sleep(sleepMsBeforeHealPartition);
|
||||||
|
|
||||||
|
// Verify that the partitioned replicas are NOT DOWN since minRf wasn't achieved
|
||||||
|
ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, 1);
|
||||||
|
|
||||||
|
proxy0.reopen();
|
||||||
|
proxy1.reopen();
|
||||||
|
|
||||||
|
notLeaders =
|
||||||
|
ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
|
||||||
|
|
||||||
|
// Check that doc 3 is on the leader but not on the notLeaders
|
||||||
|
Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1", 10000);
|
||||||
|
HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName);
|
||||||
|
assertDocExists(leaderSolr, testCollectionName, "3");
|
||||||
|
|
||||||
|
for (Replica notLeader : notLeaders) {
|
||||||
|
HttpSolrClient notLeaderSolr = getHttpSolrClient(notLeader, testCollectionName);
|
||||||
|
assertDocNotExists(notLeaderSolr, testCollectionName, "3");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Retry sending doc 3
|
||||||
|
achievedRf = sendDoc(3, 2);
|
||||||
|
assertEquals("Unexpected achieved replication factor", 3, achievedRf);
|
||||||
|
|
||||||
|
// Now doc 3 should be on all replicas
|
||||||
|
assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 3);
|
||||||
|
}
|
||||||
|
|
||||||
protected void testRf2() throws Exception {
|
protected void testRf2() throws Exception {
|
||||||
// create a collection that has 1 shard but 2 replicas
|
// create a collection that has 1 shard but 2 replicas
|
||||||
String testCollectionName = "c8n_1x2";
|
String testCollectionName = "c8n_1x2";
|
||||||
|
@ -487,12 +576,22 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
|
||||||
return new HttpSolrClient(url);
|
return new HttpSolrClient(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void sendDoc(int docId) throws Exception {
|
protected int sendDoc(int docId) throws Exception {
|
||||||
|
return sendDoc(docId, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected int sendDoc(int docId, Integer minRf) throws Exception {
|
||||||
SolrInputDocument doc = new SolrInputDocument();
|
SolrInputDocument doc = new SolrInputDocument();
|
||||||
doc.addField(id, String.valueOf(docId));
|
doc.addField(id, String.valueOf(docId));
|
||||||
doc.addField("a_t", "hello" + docId);
|
doc.addField("a_t", "hello" + docId);
|
||||||
|
|
||||||
sendDocsWithRetry(Collections.singletonList(doc), 2, 3, 100);
|
UpdateRequest up = new UpdateRequest();
|
||||||
|
if (minRf != null) {
|
||||||
|
up.setParam(UpdateRequest.MIN_REPFACT, String.valueOf(minRf));
|
||||||
|
}
|
||||||
|
up.add(doc);
|
||||||
|
|
||||||
|
return cloudClient.getMinAchievedReplicationFactor(cloudClient.getDefaultCollection(), cloudClient.request(up));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -501,13 +600,24 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("rawtypes")
|
@SuppressWarnings("rawtypes")
|
||||||
protected void assertDocExists(HttpSolrClient solr, String coll, String docId) throws Exception {
|
protected void assertDocExists(HttpSolrClient solr, String coll, String docId) throws Exception {
|
||||||
QueryRequest qr = new QueryRequest(params("qt", "/get", "id", docId, "distrib", "false"));
|
NamedList rsp = realTimeGetDocId(solr, docId);
|
||||||
NamedList rsp = solr.request(qr);
|
|
||||||
String match = JSONTestUtil.matchObj("/id", rsp.get("doc"), new Integer(docId));
|
String match = JSONTestUtil.matchObj("/id", rsp.get("doc"), new Integer(docId));
|
||||||
assertTrue("Doc with id=" + docId + " not found in " + solr.getBaseURL()
|
assertTrue("Doc with id=" + docId + " not found in " + solr.getBaseURL()
|
||||||
+ " due to: " + match + "; rsp="+rsp, match == null);
|
+ " due to: " + match + "; rsp="+rsp, match == null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void assertDocNotExists(HttpSolrClient solr, String coll, String docId) throws Exception {
|
||||||
|
NamedList rsp = realTimeGetDocId(solr, docId);
|
||||||
|
String match = JSONTestUtil.matchObj("/id", rsp.get("doc"), new Integer(docId));
|
||||||
|
assertTrue("Doc with id=" + docId + " is found in " + solr.getBaseURL()
|
||||||
|
+ " due to: " + match + "; rsp="+rsp, match != null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private NamedList realTimeGetDocId(HttpSolrClient solr, String docId) throws SolrServerException, IOException {
|
||||||
|
QueryRequest qr = new QueryRequest(params("qt", "/get", "id", docId, "distrib", "false"));
|
||||||
|
return solr.request(qr);
|
||||||
|
}
|
||||||
|
|
||||||
protected int getReplicaPort(Replica replica) {
|
protected int getReplicaPort(Replica replica) {
|
||||||
String replicaNode = replica.getNodeName();
|
String replicaNode = replica.getNodeName();
|
||||||
String tmp = replicaNode.substring(replicaNode.indexOf(':')+1);
|
String tmp = replicaNode.substring(replicaNode.indexOf(':')+1);
|
||||||
|
|
Loading…
Reference in New Issue