SOLR-12065: A successful restore collection should mark the shard state as active and not buffering

2018-04-12 08:18:35 -07:00 · 2018-04-12 08:18:35 -07:00 · 7a57ca8c0d
parent e6b65151b6
commit 7a57ca8c0d
3 changed files with 55 additions and 18 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -144,6 +144,9 @@ Bug Fixes

 * SOLR-12214: Leader may skip publish itself as ACTIVE when its last published state is DOWN (Cao Manh Dat)

+* SOLR-12065: A successful restore collection should mark the shard state as active and not buffering
+  (Rohit, Varun Thacker)
+ 
 Optimizations
 ----------------------

@ -1991,7 +1994,7 @@ Bug Fixes

 * SOLR-11024: ParallelStream should set the StreamContext when constructing SolrStreams (Joel Bernstein)

-* SOLR-10908: CloudSolrStream.toExpression incorrectly handles fq clauses (Rohit Singh via Erick Erickson)
+* SOLR-10908: CloudSolrStream.toExpression incorrectly handles fq clauses (Rohit via Erick Erickson)

 * SOLR-11177: CoreContainer.load needs to send lazily loaded core descriptors to the proper list rather than send
  them all to the transient lists. (Erick Erickson)
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
@ -258,7 +258,6 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
          propMap.put(ASYNC, asyncId);
        }
        ocmh.addPropertyParams(message, propMap);
-
        ocmh.addReplica(clusterState, new ZkNodeProps(propMap), new NamedList(), null);
      }

@ -272,11 +271,31 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
        params.set(NAME, "snapshot." + slice.getName());
        params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.toASCIIString());
        params.set(CoreAdminParams.BACKUP_REPOSITORY, repo);
-
        ocmh.sliceCmd(clusterState, params, null, slice, shardHandler, asyncId, requestMap);
      }
      ocmh.processResponses(new NamedList(), shardHandler, true, "Could not restore core", asyncId, requestMap);

+
+      for (Slice s: restoreCollection.getSlices()) {
+        for (Replica r : s.getReplicas()) {
+          String nodeName = r.getNodeName();
+          String coreNodeName = r.getCoreName();
+          Replica.State stateRep  = r.getState();
+
+          log.debug("Calling REQUESTAPPLYUPDATES on: nodeName={}, coreNodeName={}, state={}"
+              , nodeName, coreNodeName, stateRep.name());
+
+          ModifiableSolrParams params = new ModifiableSolrParams();
+          params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTAPPLYUPDATES.toString());
+          params.set(CoreAdminParams.NAME, coreNodeName);
+
+          ocmh.sendShardRequest(nodeName, params, shardHandler, asyncId, requestMap);
+        }
+
+        ocmh.processResponses(new NamedList(), shardHandler, true, "REQUESTAPPLYUPDATES calls did not succeed", asyncId, requestMap);
+
+      }
+
      //Mark all shards in ACTIVE STATE
      {
        HashMap<String, Object> propMap = new HashMap<>();
@ -288,9 +307,6 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
        inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
      }

-      //refresh the location copy of collection state
-      restoreCollection = zkStateReader.getClusterState().getCollection(restoreCollectionName);
-
      if (totalReplicasPerShard > 1) {
        log.info("Adding replicas to restored collection={}", restoreCollection);
        for (Slice slice : restoreCollection.getSlices()) {
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/AbstractCloudBackupRestoreTestCase.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/AbstractCloudBackupRestoreTestCase.java
@ -95,9 +95,9 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
    numPullReplicas = TestUtil.nextInt(random(), 0, 1);

    CollectionAdminRequest.Create create = isImplicit ?
-      // NOTE: use shard list with same # of shards as NUM_SHARDS; we assume this later
-      CollectionAdminRequest.createCollectionWithImplicitRouter(getCollectionName(), "conf1", "shard1,shard2", replFactor, numTlogReplicas, numPullReplicas) :
-      CollectionAdminRequest.createCollection(getCollectionName(), "conf1", NUM_SHARDS, replFactor, numTlogReplicas, numPullReplicas);
+        // NOTE: use shard list with same # of shards as NUM_SHARDS; we assume this later
+        CollectionAdminRequest.createCollectionWithImplicitRouter(getCollectionName(), "conf1", "shard1,shard2", replFactor, numTlogReplicas, numPullReplicas) :
+        CollectionAdminRequest.createCollection(getCollectionName(), "conf1", NUM_SHARDS, replFactor, numTlogReplicas, numPullReplicas);

    if (NUM_SHARDS * (replFactor + numTlogReplicas + numPullReplicas) > cluster.getJettySolrRunners().size() || random().nextBoolean()) {
      create.setMaxShardsPerNode((int)Math.ceil(NUM_SHARDS * (replFactor + numTlogReplicas + numPullReplicas) / cluster.getJettySolrRunners().size()));//just to assert it survives the restoration
@ -122,7 +122,7 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
    CloudSolrClient solrClient = cluster.getSolrClient();
    create.process(solrClient);

-    indexDocs(getCollectionName());
+    indexDocs(getCollectionName(), false);

    if (doSplitShardOperation) {
      // shard split the first shard
@ -197,23 +197,29 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
    return cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(collectionName).getActiveSlices().size();
  }

-  private void indexDocs(String collectionName) throws Exception {
+  private int indexDocs(String collectionName, boolean useUUID) throws Exception {
    Random random = new Random(docsSeed);// use a constant seed for the whole test run so that we can easily re-index.
    int numDocs = random.nextInt(100);
    if (numDocs == 0) {
      log.info("Indexing ZERO test docs");
-      return;
+      return 0;
    }
+
    List<SolrInputDocument> docs = new ArrayList<>(numDocs);
    for (int i=0; i<numDocs; i++) {
      SolrInputDocument doc = new SolrInputDocument();
-      doc.addField("id", i);
+      doc.addField("id", ((useUUID == true) ? java.util.UUID.randomUUID().toString() : i));
      doc.addField("shard_s", "shard" + (1 + random.nextInt(NUM_SHARDS))); // for implicit router
      docs.add(doc);
    }
+
    CloudSolrClient client = cluster.getSolrClient();
-    client.add(collectionName, docs);// batch
+    client.add(collectionName, docs); //batch
    client.commit(collectionName);
+
+    log.info("Indexed {} docs to collection: {}", numDocs, collectionName);
+
+    return numDocs;
  }

  private void testBackupAndRestore(String collectionName) throws Exception {
@ -298,7 +304,7 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
    //Re-index same docs (should be identical docs given same random seed) and test we have the same result.  Helps
    //  test we reconstituted the hash ranges / doc router.
    if (!(restoreCollection.getRouter() instanceof ImplicitDocRouter) && random().nextBoolean()) {
-      indexDocs(restoreCollectionName);
+      indexDocs(restoreCollectionName, false);
      assertEquals(origShardToDocCount, getShardToDocCountMap(client, restoreCollection));
    }

@ -327,6 +333,18 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa

    assertEquals("Restore collection should use stateFormat=2", 2, restoreCollection.getStateFormat());

+    //SOLR-12605: Add more docs after restore is complete to see if they are getting added fine
+    //explicitly querying the leaders. If we use CloudSolrClient there is no guarantee that we'll hit a nrtReplica
+    {
+      Map<String, Integer> restoredCollectionPerShardCount =  getShardToDocCountMap(client, restoreCollection);
+      long restoredCollectionDocCount = restoredCollectionPerShardCount.values().stream().mapToInt(Number::intValue).sum();
+      int numberNewDocsIndexed = indexDocs(restoreCollectionName, true);
+      Map<String, Integer> restoredCollectionPerShardCountAfterIndexing = getShardToDocCountMap(client, restoreCollection);
+      int restoredCollectionFinalDocCount = restoredCollectionPerShardCountAfterIndexing.values().stream().mapToInt(Number::intValue).sum();
+
+      log.info("Original doc count in restored collection:" + restoredCollectionDocCount + ", number of newly added documents to the restored collection: " + numberNewDocsIndexed + ", after indexing: " + restoredCollectionFinalDocCount);
+      assertEquals((restoredCollectionDocCount + numberNewDocsIndexed), restoredCollectionFinalDocCount);
+    }

    // assert added core properties:
    // DWS: did via manual inspection.