mirror of https://github.com/apache/lucene.git
SOLR-11724: Cdcr bootstrapping should ensure that non-leader replicas should sync with the leader
This commit is contained in:
parent
e4eb8a870c
commit
93f9a65b1c
|
@ -156,6 +156,9 @@ Bug Fixes
|
||||||
* SOLR-12150: Fix a test bug in CdcrBidirectionalTest.testBiDir (Steve Rowe, Amrit Sarkar via Varun Thacker)
|
* SOLR-12150: Fix a test bug in CdcrBidirectionalTest.testBiDir (Steve Rowe, Amrit Sarkar via Varun Thacker)
|
||||||
|
|
||||||
* SOLR-10513: ConjunctionSolrSpellChecker did not work with LuceneLevenshteinDistance (Amrit Sarkar via James Dyer)
|
* SOLR-10513: ConjunctionSolrSpellChecker did not work with LuceneLevenshteinDistance (Amrit Sarkar via James Dyer)
|
||||||
|
|
||||||
|
* SOLR-11724: Cdcr bootstrapping should ensure that non-leader replicas should sync with the leader
|
||||||
|
(Amrit Sarkar, Varun Thacker)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
|
@ -20,6 +20,7 @@ import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
@ -36,11 +37,14 @@ import org.apache.solr.client.solrj.SolrServerException;
|
||||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||||
import org.apache.solr.client.solrj.impl.CloudSolrClient.Builder;
|
import org.apache.solr.client.solrj.impl.CloudSolrClient.Builder;
|
||||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||||
|
import org.apache.solr.client.solrj.request.CoreAdminRequest;
|
||||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.cloud.Replica;
|
import org.apache.solr.common.cloud.Replica;
|
||||||
|
import org.apache.solr.common.cloud.Slice;
|
||||||
import org.apache.solr.common.cloud.ZkCoreNodeProps;
|
import org.apache.solr.common.cloud.ZkCoreNodeProps;
|
||||||
import org.apache.solr.common.params.CommonParams;
|
import org.apache.solr.common.params.CommonParams;
|
||||||
|
import org.apache.solr.common.params.CoreAdminParams;
|
||||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
import org.apache.solr.common.util.ExecutorUtil;
|
import org.apache.solr.common.util.ExecutorUtil;
|
||||||
|
@ -298,6 +302,8 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
|
||||||
checkpoint, collectionName, shard);
|
checkpoint, collectionName, shard);
|
||||||
CdcrUpdateLog.CdcrLogReader reader1 = ulog.newLogReader();
|
CdcrUpdateLog.CdcrLogReader reader1 = ulog.newLogReader();
|
||||||
reader1.seek(checkpoint);
|
reader1.seek(checkpoint);
|
||||||
|
// issue asynchronous request_recovery to the follower nodes of the shards of target collection
|
||||||
|
sendRequestRecoveryToFollowers(state);
|
||||||
success = true;
|
success = true;
|
||||||
break;
|
break;
|
||||||
} else if (status == BootstrapStatus.FAILED) {
|
} else if (status == BootstrapStatus.FAILED) {
|
||||||
|
@ -411,6 +417,29 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
|
||||||
return client.request(request);
|
return client.request(request);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void sendRequestRecoveryToFollowers(CdcrReplicatorState state) throws SolrServerException, IOException {
|
||||||
|
Collection<Slice> slices = state.getClient().getZkStateReader().getClusterState().getCollection(state.getTargetCollection()).getActiveSlices();
|
||||||
|
for (Slice slice : slices) {
|
||||||
|
Collection<Replica> replicas = slice.getReplicas();
|
||||||
|
for (Replica replica : replicas) {
|
||||||
|
if (slice.getLeader().getCoreName().equals(replica.getCoreName())) {
|
||||||
|
continue; // no need to request recovery for leader
|
||||||
|
}
|
||||||
|
sendRequestRecoveryToFollower(state.getClient(), replica.getCoreName());
|
||||||
|
log.info("RequestRecovery cmd is issued by core: " + replica.getCoreName() + " of shard: " + slice.getName() +
|
||||||
|
"for target: " + state.getTargetCollection());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private NamedList sendRequestRecoveryToFollower(SolrClient client, String coreName) throws SolrServerException, IOException {
|
||||||
|
CoreAdminRequest.RequestRecovery recoverRequestCmd = new CoreAdminRequest.RequestRecovery();
|
||||||
|
recoverRequestCmd.setAction(CoreAdminParams.CoreAdminAction.REQUESTRECOVERY);
|
||||||
|
recoverRequestCmd.setCoreName(coreName);
|
||||||
|
return client.request(recoverRequestCmd);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private enum BootstrapStatus {
|
private enum BootstrapStatus {
|
||||||
SUBMITTED,
|
SUBMITTED,
|
||||||
RUNNING,
|
RUNNING,
|
||||||
|
|
|
@ -105,7 +105,8 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
// setup the target cluster
|
// setup the target cluster
|
||||||
target.uploadConfigSet(configset("cdcr-target"), "cdcr-target");
|
target.uploadConfigSet(configset("cdcr-target"), "cdcr-target");
|
||||||
CollectionAdminRequest.createCollection("cdcr-target", "cdcr-target", 1, 1)
|
CollectionAdminRequest.createCollection("cdcr-target", "cdcr-target", 1, 2)
|
||||||
|
.setMaxShardsPerNode(2)
|
||||||
.process(target.getSolrClient());
|
.process(target.getSolrClient());
|
||||||
CloudSolrClient targetSolrClient = target.getSolrClient();
|
CloudSolrClient targetSolrClient = target.getSolrClient();
|
||||||
targetSolrClient.setDefaultCollection("cdcr-target");
|
targetSolrClient.setDefaultCollection("cdcr-target");
|
||||||
|
@ -118,6 +119,7 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
|
||||||
log.info("Cdcr queue response: " + response.getResponse());
|
log.info("Cdcr queue response: " + response.getResponse());
|
||||||
long foundDocs = CdcrTestsUtil.waitForClusterToSync(numDocs, targetSolrClient);
|
long foundDocs = CdcrTestsUtil.waitForClusterToSync(numDocs, targetSolrClient);
|
||||||
assertEquals("Document mismatch on target after sync", numDocs, foundDocs);
|
assertEquals("Document mismatch on target after sync", numDocs, foundDocs);
|
||||||
|
assertTrue(CdcrTestsUtil.assertShardInSync("cdcr-target", "shard1", targetSolrClient)); // with more than 1 replica
|
||||||
|
|
||||||
params = new ModifiableSolrParams();
|
params = new ModifiableSolrParams();
|
||||||
params.set(CommonParams.ACTION, CdcrParams.CdcrAction.COLLECTIONCHECKPOINT.toString());
|
params.set(CommonParams.ACTION, CdcrParams.CdcrAction.COLLECTIONCHECKPOINT.toString());
|
||||||
|
@ -300,5 +302,4 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
|
||||||
target.shutdown();
|
target.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,11 +26,17 @@ import org.apache.solr.SolrTestCaseJ4;
|
||||||
import org.apache.solr.client.solrj.SolrQuery;
|
import org.apache.solr.client.solrj.SolrQuery;
|
||||||
import org.apache.solr.client.solrj.SolrServerException;
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||||
|
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
|
import org.apache.solr.common.cloud.DocCollection;
|
||||||
|
import org.apache.solr.common.cloud.Replica;
|
||||||
|
import org.apache.solr.common.cloud.Slice;
|
||||||
import org.apache.solr.common.params.CommonParams;
|
import org.apache.solr.common.params.CommonParams;
|
||||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
import org.apache.solr.common.util.TimeSource;
|
||||||
import org.apache.solr.handler.CdcrParams;
|
import org.apache.solr.handler.CdcrParams;
|
||||||
|
import org.apache.solr.util.TimeOut;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -108,4 +114,36 @@ public class CdcrTestsUtil extends SolrTestCaseJ4{
|
||||||
}
|
}
|
||||||
return response != null ? response.getResults().getNumFound() : 0;
|
return response != null ? response.getResults().getNumFound() : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected static boolean assertShardInSync(String collection, String shard, CloudSolrClient client) throws IOException, SolrServerException {
|
||||||
|
TimeOut waitTimeOut = new TimeOut(30, TimeUnit.SECONDS, TimeSource.NANO_TIME);
|
||||||
|
DocCollection docCollection = client.getZkStateReader().getClusterState().getCollection(collection);
|
||||||
|
Slice correctSlice = null;
|
||||||
|
for (Slice slice : docCollection.getSlices()) {
|
||||||
|
if (shard.equals(slice.getName())) {
|
||||||
|
correctSlice = slice;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertNotNull(correctSlice);
|
||||||
|
|
||||||
|
long leaderDocCount;
|
||||||
|
try (HttpSolrClient leaderClient = new HttpSolrClient.Builder(correctSlice.getLeader().getCoreUrl()).withHttpClient(client.getHttpClient()).build()) {
|
||||||
|
leaderDocCount = leaderClient.query(new SolrQuery("*:*").setParam("distrib", "false")).getResults().getNumFound();
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!waitTimeOut.hasTimedOut()) {
|
||||||
|
int replicasInSync = 0;
|
||||||
|
for (Replica replica : correctSlice.getReplicas()) {
|
||||||
|
try (HttpSolrClient leaderClient = new HttpSolrClient.Builder(replica.getCoreUrl()).withHttpClient(client.getHttpClient()).build()) {
|
||||||
|
long replicaDocCount = leaderClient.query(new SolrQuery("*:*").setParam("distrib", "false")).getResults().getNumFound();
|
||||||
|
if (replicaDocCount == leaderDocCount) replicasInSync++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (replicasInSync == correctSlice.getReplicas().size()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue