From 6c3b8b5b0283292db5a0e1924d5c37405550293f Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Thu, 26 Jan 2012 15:29:04 +0000 Subject: [PATCH] cleanup/fix logic around setting active state on startup git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1236240 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/solr/cloud/ZkController.java | 70 +++++++++---------- 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java index 80bb9e9ad09..5467b7d0a00 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java @@ -508,9 +508,33 @@ public final class ZkController { try { core = cc.getCore(desc.getName()); - boolean startRecovery = checkRecovery(coreName, desc, recoverReloadedCores, isLeader, cloudDesc, + if (isLeader) { + // recover from local transaction log and wait for it to complete before + // going active + // TODO: should this be moved to another thread? To recoveryStrat? + // TODO: should this actually be done earlier, before (or as part of) + // leader election perhaps? + // TODO: ensure that a replica that is trying to recover waits until I'm + // active (or don't make me the + // leader until my local replay is done. But this replay is only needed + // on the leader - replicas + // will do recovery anyway + + UpdateLog ulog = core.getUpdateHandler().getUpdateLog(); + if (!core.isReloaded() && ulog != null) { + Future recoveryFuture = core.getUpdateHandler() + .getUpdateLog().recoverFromLog(); + if (recoveryFuture != null) { + recoveryFuture.get(); // NOTE: this could potentially block for + // minutes or more! + // TODO: public as recovering in the mean time? + } + } + } + + boolean didRecovery = checkRecovery(coreName, desc, recoverReloadedCores, isLeader, cloudDesc, collection, coreZkNodeName, shardId, leaderProps, core, cc); - if (!startRecovery) { + if (didRecovery) { publishAsActive(baseUrl, desc, coreZkNodeName, coreName); } } finally { @@ -546,46 +570,18 @@ public final class ZkController { SolrCore core, CoreContainer cc) throws InterruptedException, KeeperException, IOException, ExecutionException { - boolean doRecovery = true; - - - if (isLeader) { - doRecovery = false; - - // recover from local transaction log and wait for it to complete before - // going active - // TODO: should this be moved to another thread? To recoveryStrat? - // TODO: should this actually be done earlier, before (or as part of) - // leader election perhaps? - // TODO: ensure that a replica that is trying to recover waits until I'm - // active (or don't make me the - // leader until my local replay is done. But this replay is only needed - // on the leader - replicas - // will do recovery anyway - - UpdateLog ulog = core.getUpdateHandler().getUpdateLog(); - if (!core.isReloaded() && ulog != null) { - Future recoveryFuture = core.getUpdateHandler() - .getUpdateLog().recoverFromLog(); - if (recoveryFuture != null) { - recoveryFuture.get(); // NOTE: this could potentially block for - // minutes or more! - // TODO: public as recovering in the mean time? - } - } - return false; - } else { + if (!isLeader) { if (core.isReloaded() && !recoverReloadedCores) { doRecovery = false; } - } - - if (doRecovery && !SKIP_AUTO_RECOVERY) { - log.info("Core needs to recover:" + core.getName()); - core.getUpdateHandler().getSolrCoreState().doRecovery(core); - return true; + + if (doRecovery && !SKIP_AUTO_RECOVERY) { + log.info("Core needs to recover:" + core.getName()); + core.getUpdateHandler().getSolrCoreState().doRecovery(core); + return true; + } } return false;