diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index de748be75c5..b39fd6f91ab 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -156,6 +156,9 @@ Bug Fixes * SOLR-12150: Fix a test bug in CdcrBidirectionalTest.testBiDir (Steve Rowe, Amrit Sarkar via Varun Thacker) * SOLR-10513: ConjunctionSolrSpellChecker did not work with LuceneLevenshteinDistance (Amrit Sarkar via James Dyer) + +* SOLR-11724: Cdcr bootstrapping should ensure that non-leader replicas should sync with the leader + (Amrit Sarkar, Varun Thacker) Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/CdcrReplicatorManager.java b/solr/core/src/java/org/apache/solr/handler/CdcrReplicatorManager.java index abd6ed72f3e..8ec3c8be0a7 100644 --- a/solr/core/src/java/org/apache/solr/handler/CdcrReplicatorManager.java +++ b/solr/core/src/java/org/apache/solr/handler/CdcrReplicatorManager.java @@ -20,6 +20,7 @@ import java.io.Closeable; import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Locale; @@ -36,11 +37,14 @@ import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.impl.CloudSolrClient.Builder; import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.apache.solr.client.solrj.request.CoreAdminRequest; import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.ZkCoreNodeProps; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.ExecutorUtil; @@ -298,6 +302,8 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver { checkpoint, collectionName, shard); CdcrUpdateLog.CdcrLogReader reader1 = ulog.newLogReader(); reader1.seek(checkpoint); + // issue asynchronous request_recovery to the follower nodes of the shards of target collection + sendRequestRecoveryToFollowers(state); success = true; break; } else if (status == BootstrapStatus.FAILED) { @@ -411,6 +417,29 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver { return client.request(request); } + private void sendRequestRecoveryToFollowers(CdcrReplicatorState state) throws SolrServerException, IOException { + Collection slices = state.getClient().getZkStateReader().getClusterState().getCollection(state.getTargetCollection()).getActiveSlices(); + for (Slice slice : slices) { + Collection replicas = slice.getReplicas(); + for (Replica replica : replicas) { + if (slice.getLeader().getCoreName().equals(replica.getCoreName())) { + continue; // no need to request recovery for leader + } + sendRequestRecoveryToFollower(state.getClient(), replica.getCoreName()); + log.info("RequestRecovery cmd is issued by core: " + replica.getCoreName() + " of shard: " + slice.getName() + + "for target: " + state.getTargetCollection()); + } + } + } + + private NamedList sendRequestRecoveryToFollower(SolrClient client, String coreName) throws SolrServerException, IOException { + CoreAdminRequest.RequestRecovery recoverRequestCmd = new CoreAdminRequest.RequestRecovery(); + recoverRequestCmd.setAction(CoreAdminParams.CoreAdminAction.REQUESTRECOVERY); + recoverRequestCmd.setCoreName(coreName); + return client.request(recoverRequestCmd); + } + + private enum BootstrapStatus { SUBMITTED, RUNNING, diff --git a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java index cae98554908..543bd5cacfc 100644 --- a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java @@ -105,7 +105,8 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { // setup the target cluster target.uploadConfigSet(configset("cdcr-target"), "cdcr-target"); - CollectionAdminRequest.createCollection("cdcr-target", "cdcr-target", 1, 1) + CollectionAdminRequest.createCollection("cdcr-target", "cdcr-target", 1, 2) + .setMaxShardsPerNode(2) .process(target.getSolrClient()); CloudSolrClient targetSolrClient = target.getSolrClient(); targetSolrClient.setDefaultCollection("cdcr-target"); @@ -118,6 +119,7 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { log.info("Cdcr queue response: " + response.getResponse()); long foundDocs = CdcrTestsUtil.waitForClusterToSync(numDocs, targetSolrClient); assertEquals("Document mismatch on target after sync", numDocs, foundDocs); + assertTrue(CdcrTestsUtil.assertShardInSync("cdcr-target", "shard1", targetSolrClient)); // with more than 1 replica params = new ModifiableSolrParams(); params.set(CommonParams.ACTION, CdcrParams.CdcrAction.COLLECTIONCHECKPOINT.toString()); @@ -300,5 +302,4 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { target.shutdown(); } } - } diff --git a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java index 99aa47196e0..6a186fdb12f 100644 --- a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java +++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java @@ -26,11 +26,17 @@ import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.TimeSource; import org.apache.solr.handler.CdcrParams; +import org.apache.solr.util.TimeOut; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -108,4 +114,36 @@ public class CdcrTestsUtil extends SolrTestCaseJ4{ } return response != null ? response.getResults().getNumFound() : 0; } + + protected static boolean assertShardInSync(String collection, String shard, CloudSolrClient client) throws IOException, SolrServerException { + TimeOut waitTimeOut = new TimeOut(30, TimeUnit.SECONDS, TimeSource.NANO_TIME); + DocCollection docCollection = client.getZkStateReader().getClusterState().getCollection(collection); + Slice correctSlice = null; + for (Slice slice : docCollection.getSlices()) { + if (shard.equals(slice.getName())) { + correctSlice = slice; + break; + } + } + assertNotNull(correctSlice); + + long leaderDocCount; + try (HttpSolrClient leaderClient = new HttpSolrClient.Builder(correctSlice.getLeader().getCoreUrl()).withHttpClient(client.getHttpClient()).build()) { + leaderDocCount = leaderClient.query(new SolrQuery("*:*").setParam("distrib", "false")).getResults().getNumFound(); + } + + while (!waitTimeOut.hasTimedOut()) { + int replicasInSync = 0; + for (Replica replica : correctSlice.getReplicas()) { + try (HttpSolrClient leaderClient = new HttpSolrClient.Builder(replica.getCoreUrl()).withHttpClient(client.getHttpClient()).build()) { + long replicaDocCount = leaderClient.query(new SolrQuery("*:*").setParam("distrib", "false")).getResults().getNumFound(); + if (replicaDocCount == leaderDocCount) replicasInSync++; + } + } + if (replicasInSync == correctSlice.getReplicas().size()) { + return true; + } + } + return false; + } }