SOLR-7245: Temporary ZK election or connection loss should not stall indexing due to LIR

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1668274 13f79535-47bb-0310-9956-ffa450edef68
2025-02-22 18:27:21 +00:00 · 2015-03-21 14:13:17 +00:00 · 2015-03-21 14:13:17 +00:00 · f15330c32d
commit f15330c32d
parent 54ba78e4de
9 changed files with 62 additions and 43 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -195,6 +195,8 @@ New Features
    the JSON request.
  (yonik)

+* SOLR-7245: Temporary ZK election or connection loss should not stall indexing
+  due to leader initiated recovery (Ramkumar Aiyengar)

 Bug Fixes
 ----------------------
--- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
@ -402,8 +402,9 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
                                                    120,
                                                    coreNodeName);
              zkController.ensureReplicaInLeaderInitiatedRecovery(
-                  collection, shardId, coreNodeProps, false, coreNodeName);
-              
+                  collection, shardId, coreNodeProps, coreNodeName,
+                  false /* forcePublishState */, true /* retryOnConnLoss */);
+
              ExecutorService executor = cc.getUpdateShardHandler().getUpdateExecutor();
              executor.execute(lirThread);
            }              
--- a/solr/core/src/java/org/apache/solr/cloud/LeaderInitiatedRecoveryThread.java
+++ b/solr/core/src/java/org/apache/solr/cloud/LeaderInitiatedRecoveryThread.java
@ -226,8 +226,10 @@ public class LeaderInitiatedRecoveryThread extends Thread {
                        // so its state cannot be trusted and it needs to be told to recover again ... and we keep looping here
                        log.warn("Replica core={} coreNodeName={} set to active but the leader thinks it should be in recovery;"
                            + " forcing it back to down state to re-run the leader-initiated recovery process; props: "+replicaProps.get(0), coreNeedingRecovery, replicaCoreNodeName);
-                        // force republish state to "down"
-                        zkController.ensureReplicaInLeaderInitiatedRecovery(collection, shardId, nodeProps, true, leaderCoreNodeName);
+                        zkController.ensureReplicaInLeaderInitiatedRecovery(
+                            collection, shardId, nodeProps, leaderCoreNodeName,
+                            true /* forcePublishState */, true /* retryOnConnLoss */
+                        );
                      }
                    }
                    break;
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@ -1130,7 +1130,7 @@ public final class ZkController {
        if (ZkStateReader.ACTIVE.equals(state)) {
          // trying to become active, so leader-initiated state must be recovering
          if (ZkStateReader.RECOVERING.equals(lirState)) {
-            updateLeaderInitiatedRecoveryState(collection, shardId, coreNodeName, ZkStateReader.ACTIVE, null);
+            updateLeaderInitiatedRecoveryState(collection, shardId, coreNodeName, ZkStateReader.ACTIVE, null, true);
          } else if (ZkStateReader.DOWN.equals(lirState)) {
            throw new SolrException(ErrorCode.INVALID_STATE, 
                "Cannot publish state of core '"+cd.getName()+"' as active without recovering first!");
@ -1138,7 +1138,7 @@ public final class ZkController {
        } else if (ZkStateReader.RECOVERING.equals(state)) {
          // if it is currently DOWN, then trying to enter into recovering state is good
          if (ZkStateReader.DOWN.equals(lirState)) {
-            updateLeaderInitiatedRecoveryState(collection, shardId, coreNodeName, ZkStateReader.RECOVERING, null);
+            updateLeaderInitiatedRecoveryState(collection, shardId, coreNodeName, ZkStateReader.RECOVERING, null, true);
          }
        }
      }
@ -1882,8 +1882,9 @@ public final class ZkController {
   * false means the node is not live either, so no point in trying to send recovery commands
   * to it.
   */
-  public boolean ensureReplicaInLeaderInitiatedRecovery(final String collection, 
-      final String shardId, final ZkCoreNodeProps replicaCoreProps, boolean forcePublishState, String leaderCoreNodeName)
+  public boolean ensureReplicaInLeaderInitiatedRecovery(
+      final String collection, final String shardId, final ZkCoreNodeProps replicaCoreProps,
+      String leaderCoreNodeName, boolean forcePublishState, boolean retryOnConnLoss)
          throws KeeperException, InterruptedException 
  {
    final String replicaUrl = replicaCoreProps.getCoreUrl();
@ -1893,10 +1894,10 @@ public final class ZkController {

    if (shardId == null)
      throw new IllegalArgumentException("shard parameter cannot be null for starting leader-initiated recovery for replica: "+replicaUrl);
-    
+
    if (replicaUrl == null)
      throw new IllegalArgumentException("replicaUrl parameter cannot be null for starting leader-initiated recovery");
-    
+
    // First, determine if this replica is already in recovery handling
    // which is needed because there can be many concurrent errors flooding in
    // about the same replica having trouble and we only need to send the "needs"
@ -1918,7 +1919,7 @@ public final class ZkController {
      // we only really need to try to send the recovery command if the node itself is "live"
      if (getZkStateReader().getClusterState().liveNodesContain(replicaNodeName)) {
        // create a znode that requires the replica needs to "ack" to verify it knows it was out-of-sync
-        updateLeaderInitiatedRecoveryState(collection, shardId, replicaCoreNodeName, ZkStateReader.DOWN, leaderCoreNodeName);
+        updateLeaderInitiatedRecoveryState(collection, shardId, replicaCoreNodeName, ZkStateReader.DOWN, leaderCoreNodeName, retryOnConnLoss);
        replicasInLeaderInitiatedRecovery.put(replicaUrl,
            getLeaderInitiatedRecoveryZnodePath(collection, shardId, replicaCoreNodeName));
        log.info("Put replica core={} coreNodeName={} on "+
@ -2015,7 +2016,7 @@ public final class ZkController {
  }
  
  private void updateLeaderInitiatedRecoveryState(String collection, String shardId, String coreNodeName, String state,
-                                                  String leaderCoreNodeName) {
+                                                  String leaderCoreNodeName, boolean retryOnConnLoss) {
    if (collection == null || shardId == null || coreNodeName == null) {
      log.warn("Cannot set leader-initiated recovery state znode to "+state+" using: collection="+collection+
          "; shardId="+shardId+"; coreNodeName="+coreNodeName);
--- a/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
@ -52,7 +52,6 @@ import org.apache.solr.core.CoreContainer;
 import org.apache.solr.core.CoreDescriptor;
 import org.apache.solr.handler.component.RealTimeGetComponent;
 import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.request.SolrRequestInfo;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.update.AddUpdateCommand;
@ -147,7 +146,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
      this.nodeErrorTracker = new HashMap<>(5);
      this.otherLeaderRf = new HashMap<>();
    }
-            
+
    // gives the replication factor that was achieved for this request
    public int getAchievedRf() {
      // look across all shards to find the minimum achieved replication
@ -286,7 +285,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
    returnVersions = req.getParams().getBool(UpdateParams.VERSIONS ,false);

    // TODO: better way to get the response, or pass back info to it?
-    SolrRequestInfo reqInfo = returnVersions ? SolrRequestInfo.getRequestInfo() : null;
+    // SolrRequestInfo reqInfo = returnVersions ? SolrRequestInfo.getRequestInfo() : null;

    this.req = req;
    
@ -847,11 +846,19 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {

        // before we go setting other replicas to down, make sure we're still the leader!
        String leaderCoreNodeName = null;
+        Exception getLeaderExc = null;
        try {
-          leaderCoreNodeName = zkController.getZkStateReader().getLeaderRetry(collection, shardId).getName();
+          Replica leader = zkController.getZkStateReader().getLeader(collection, shardId);
+          if (leader != null) {
+            leaderCoreNodeName = leader.getName();
+          }
        } catch (Exception exc) {
-          log.error("Failed to determine if " + cloudDesc.getCoreNodeName() + " is still the leader for " + collection +
-              " " + shardId + " before putting " + replicaUrl + " into leader-initiated recovery due to: " + exc);
+          getLeaderExc = exc;
+        }
+        if (leaderCoreNodeName == null) {
+          log.warn("Failed to determine if {} is still the leader for collection={} shardId={} " +
+                  "before putting {} into leader-initiated recovery",
+              cloudDesc.getCoreNodeName(), collection, shardId, replicaUrl, getLeaderExc);
        }

        List<ZkCoreNodeProps> myReplicas = zkController.getZkStateReader().getReplicaProps(collection,
@ -873,8 +880,10 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
                zkController.ensureReplicaInLeaderInitiatedRecovery(collection,
                    shardId,
                    stdNode.getNodeProps(),
-                    false,
-                    leaderCoreNodeName);
+                    leaderCoreNodeName,
+                    false /* forcePublishState */,
+                    false /* retryOnConnLoss */
+                );

            // we want to try more than once, ~10 minutes
            if (sendRecoveryCommand) {
@ -909,7 +918,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
        continue; // the replica is already in recovery handling or is not live   

      Throwable rootCause = SolrException.getRootCause(error.e);
-      log.error("Setting up to try to start recovery on replica " + replicaUrl + " after: " + rootCause);
+      log.error("Setting up to try to start recovery on replica {}", replicaUrl, rootCause);

      // try to send the recovery command to the downed replica in a background thread
      CoreContainer coreContainer = req.getCore().getCoreDescriptor().getCoreContainer();
@ -1591,7 +1600,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
    
    if (!zkEnabled || req.getParams().getBool(COMMIT_END_POINT, false) || singleLeader) {
      doLocalCommit(cmd);
-    } else if (zkEnabled) {
+    } else {
      ModifiableSolrParams params = new ModifiableSolrParams(filterParams(req.getParams()));
      if (!req.getParams().getBool(COMMIT_END_POINT, false)) {
        params.set(COMMIT_END_POINT, true);
--- a/solr/core/src/test/org/apache/solr/cloud/BasicZkTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/BasicZkTest.java
@ -25,7 +25,6 @@ import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.request.SolrQueryRequest;
-import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;

@ -106,7 +105,10 @@ public class BasicZkTest extends AbstractZkTestCase {
    int zkPort = zkServer.getPort();

    zkServer.shutdown();
-    
+
+    // document indexing shouldn't stop immediately after a ZK disconnect
+    assertU(adoc("id", "201"));
+
    Thread.sleep(300);
    
    // try a reconnect from disconnect
@ -174,9 +176,4 @@ public class BasicZkTest extends AbstractZkTestCase {
    req.setParams(params);
    return req;
  }
-  
-  @AfterClass
-  public static void afterClass() {
-
-  }
 }
--- a/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
@ -17,7 +17,6 @@ package org.apache.solr.cloud;
 * limitations under the License.
 */

-import org.apache.http.NoHttpResponseException;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.solr.JSONTestUtil;
 import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
@ -26,7 +25,6 @@ import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.QueryRequest;
-import org.apache.solr.client.solrj.request.UpdateRequest;
 import org.apache.solr.client.solrj.response.CollectionAdminResponse;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
@ -146,7 +144,7 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
    String replicaUrl = replicaCoreNodeProps.getCoreUrl();

    assertTrue(!zkController.isReplicaInRecoveryHandling(replicaUrl));
-    assertTrue(zkController.ensureReplicaInLeaderInitiatedRecovery(testCollectionName, shardId, replicaCoreNodeProps, false, leader.getName()));
+    assertTrue(zkController.ensureReplicaInLeaderInitiatedRecovery(testCollectionName, shardId, replicaCoreNodeProps, leader.getName(), false, true));
    assertTrue(zkController.isReplicaInRecoveryHandling(replicaUrl));
    Map<String,Object> lirStateMap = zkController.getLeaderInitiatedRecoveryStateObject(testCollectionName, shardId, notLeader.getName());
    assertNotNull(lirStateMap);
--- a/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkControllerTest.java
@ -278,7 +278,7 @@ public class ZkControllerTest extends SolrTestCaseJ4 {
        try {
          // this method doesn't throw exception when node isn't leader
          zkController.ensureReplicaInLeaderInitiatedRecovery("c1", "shard1",
-              new ZkCoreNodeProps(replica), false, "non_existent_leader");
+              new ZkCoreNodeProps(replica), "non_existent_leader", false, false);
          fail("ZkController should not write LIR state for node which is not leader");
        } catch (Exception e) {
          assertNull("ZkController should not write LIR state for node which is not leader",
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
@ -102,6 +102,8 @@ public class ZkStateReader implements Closeable {
  protected volatile ClusterState clusterState;

  private static final long SOLRCLOUD_UPDATE_DELAY = Long.parseLong(System.getProperty("solrcloud.update.delay", "5000"));
+  private static final int GET_LEADER_RETRY_INTERVAL_MS = 50;
+  private static final int GET_LEADER_RETRY_DEFAULT_TIMEOUT = 4000;

  public static final String LEADER_ELECT_ZKNODE = "leader_elect";

@ -642,12 +644,22 @@ public class ZkStateReader implements Closeable {
        shard, timeout));
    return props.getCoreUrl();
  }
-  
+
+  public Replica getLeader(String collection, String shard) throws InterruptedException {
+    if (clusterState != null) {
+      Replica replica = clusterState.getLeader(collection, shard);
+      if (replica != null && getClusterState().liveNodesContain(replica.getNodeName())) {
+        return replica;
+      }
+    }
+    return null;
+  }
+
  /**
   * Get shard leader properties, with retry if none exist.
   */
  public Replica getLeaderRetry(String collection, String shard) throws InterruptedException {
-    return getLeaderRetry(collection, shard, 4000);
+    return getLeaderRetry(collection, shard, GET_LEADER_RETRY_DEFAULT_TIMEOUT);
  }

  /**
@ -655,14 +667,11 @@ public class ZkStateReader implements Closeable {
   */
  public Replica getLeaderRetry(String collection, String shard, int timeout) throws InterruptedException {
    long timeoutAt = System.nanoTime() + TimeUnit.NANOSECONDS.convert(timeout, TimeUnit.MILLISECONDS);
-    while (System.nanoTime() < timeoutAt && !closed) {
-      if (clusterState != null) {    
-        Replica replica = clusterState.getLeader(collection, shard);
-        if (replica != null && getClusterState().liveNodesContain(replica.getNodeName())) {
-          return replica;
-        }
-      }
-      Thread.sleep(50);
+    while (true) {
+      Replica leader = getLeader(collection, shard);
+      if (leader != null) return leader;
+      if (System.nanoTime() >= timeoutAt || closed) break;
+      Thread.sleep(GET_LEADER_RETRY_INTERVAL_MS);
    }
    throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "No registered leader was found after waiting for "
        + timeout + "ms " + ", collection: " + collection + " slice: " + shard);