From 1c84da10643ad53ba24564cba6f1b6110e7a1add Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Sun, 26 Feb 2012 16:25:32 +0000 Subject: [PATCH] SOLR-3080: do tlog recovery for all nodes, not just leaders git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1293863 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/solr/cloud/ZkController.java | 48 +++++++++++-------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java index 41ae1a66149..5152c6cf702 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java @@ -273,6 +273,13 @@ public final class ZkController { return zkStateReader.getCloudState(); } + /** @return the CoreState for the core, which may not yet be visible to ZooKeeper or other nodes in the cluster */ + public CoreState getCoreState(String coreName) { + synchronized (coreStates) { + return coreStates.get(coreName); + } + } + /** * @param zkConfigName * @param fileName @@ -557,29 +564,28 @@ public final class ZkController { try { core = cc.getCore(desc.getName()); - if (isLeader) { - // recover from local transaction log and wait for it to complete before - // going active - // TODO: should this be moved to another thread? To recoveryStrat? - // TODO: should this actually be done earlier, before (or as part of) - // leader election perhaps? - // TODO: ensure that a replica that is trying to recover waits until I'm - // active (or don't make me the - // leader until my local replay is done. But this replay is only needed - // on the leader - replicas - // will do recovery anyway - - UpdateLog ulog = core.getUpdateHandler().getUpdateLog(); - if (!core.isReloaded() && ulog != null) { - Future recoveryFuture = core.getUpdateHandler() - .getUpdateLog().recoverFromLog(); - if (recoveryFuture != null) { - recoveryFuture.get(); // NOTE: this could potentially block for - // minutes or more! - // TODO: public as recovering in the mean time? - } + + // recover from local transaction log and wait for it to complete before + // going active + // TODO: should this be moved to another thread? To recoveryStrat? + // TODO: should this actually be done earlier, before (or as part of) + // leader election perhaps? + // TODO: if I'm the leader, ensure that a replica that is trying to recover waits until I'm + // active (or don't make me the + // leader until my local replay is done. + + UpdateLog ulog = core.getUpdateHandler().getUpdateLog(); + if (!core.isReloaded() && ulog != null) { + Future recoveryFuture = core.getUpdateHandler() + .getUpdateLog().recoverFromLog(); + if (recoveryFuture != null) { + recoveryFuture.get(); // NOTE: this could potentially block for + // minutes or more! + // TODO: public as recovering in the mean time? + // TODO: in the future we could do peerync in parallel with recoverFromLog } } + boolean didRecovery = checkRecovery(coreName, desc, recoverReloadedCores, isLeader, cloudDesc, collection, coreZkNodeName, shardId, leaderProps, core, cc);