SOLR-3080: do tlog recovery for all nodes, not just leaders

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1293863 13f79535-47bb-0310-9956-ffa450edef68
2012-02-26 16:25:32 +00:00 · 2012-02-26 16:25:32 +00:00 · 1c84da1064
parent 9d11291c02
commit 1c84da1064
1 changed files with 27 additions and 21 deletions
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@ -273,6 +273,13 @@ public final class ZkController {
    return zkStateReader.getCloudState();
  }
  /** @return the CoreState for the core, which may not yet be visible to ZooKeeper or other nodes in the cluster */
  public CoreState getCoreState(String coreName) {
    synchronized (coreStates) {
      return coreStates.get(coreName);
    }
  }
  /**
   * @param zkConfigName
   * @param fileName
@ -557,30 +564,29 @@ public final class ZkController {
      try {
        core = cc.getCore(desc.getName());
        if (isLeader) {
          // recover from local transaction log and wait for it to complete before
          // going active
          // TODO: should this be moved to another thread? To recoveryStrat?
          // TODO: should this actually be done earlier, before (or as part of)
          // leader election perhaps?
          // TODO: ensure that a replica that is trying to recover waits until I'm
          // active (or don't make me the
          // leader until my local replay is done. But this replay is only needed
          // on the leader - replicas
          // will do recovery anyway
-          UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
+        // recover from local transaction log and wait for it to complete before
-          if (!core.isReloaded() && ulog != null) {
+        // going active
-            Future<UpdateLog.RecoveryInfo> recoveryFuture = core.getUpdateHandler()
+        // TODO: should this be moved to another thread? To recoveryStrat?
-                .getUpdateLog().recoverFromLog();
+        // TODO: should this actually be done earlier, before (or as part of)
-            if (recoveryFuture != null) {
+        // leader election perhaps?
-              recoveryFuture.get(); // NOTE: this could potentially block for
+        // TODO: if I'm the leader, ensure that a replica that is trying to recover waits until I'm
-                                    // minutes or more!
+        // active (or don't make me the
-              // TODO: public as recovering in the mean time?
+        // leader until my local replay is done.
-            }
+
        UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
        if (!core.isReloaded() && ulog != null) {
          Future<UpdateLog.RecoveryInfo> recoveryFuture = core.getUpdateHandler()
              .getUpdateLog().recoverFromLog();
          if (recoveryFuture != null) {
            recoveryFuture.get(); // NOTE: this could potentially block for
            // minutes or more!
            // TODO: public as recovering in the mean time?
            // TODO: in the future we could do peerync in parallel with recoverFromLog
          }
        }
        boolean didRecovery = checkRecovery(coreName, desc, recoverReloadedCores, isLeader, cloudDesc,
            collection, coreZkNodeName, shardId, leaderProps, core, cc);
        if (!didRecovery) {