cleanup/fix logic around setting active state on startup

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1236240 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2012-01-26 15:29:04 +00:00
parent 5f9209aff2
commit 6c3b8b5b02
1 changed files with 33 additions and 37 deletions

View File

@ -508,9 +508,33 @@ public final class ZkController {
try { try {
core = cc.getCore(desc.getName()); core = cc.getCore(desc.getName());
boolean startRecovery = checkRecovery(coreName, desc, recoverReloadedCores, isLeader, cloudDesc, if (isLeader) {
// recover from local transaction log and wait for it to complete before
// going active
// TODO: should this be moved to another thread? To recoveryStrat?
// TODO: should this actually be done earlier, before (or as part of)
// leader election perhaps?
// TODO: ensure that a replica that is trying to recover waits until I'm
// active (or don't make me the
// leader until my local replay is done. But this replay is only needed
// on the leader - replicas
// will do recovery anyway
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
if (!core.isReloaded() && ulog != null) {
Future<UpdateLog.RecoveryInfo> recoveryFuture = core.getUpdateHandler()
.getUpdateLog().recoverFromLog();
if (recoveryFuture != null) {
recoveryFuture.get(); // NOTE: this could potentially block for
// minutes or more!
// TODO: public as recovering in the mean time?
}
}
}
boolean didRecovery = checkRecovery(coreName, desc, recoverReloadedCores, isLeader, cloudDesc,
collection, coreZkNodeName, shardId, leaderProps, core, cc); collection, coreZkNodeName, shardId, leaderProps, core, cc);
if (!startRecovery) { if (didRecovery) {
publishAsActive(baseUrl, desc, coreZkNodeName, coreName); publishAsActive(baseUrl, desc, coreZkNodeName, coreName);
} }
} finally { } finally {
@ -546,46 +570,18 @@ public final class ZkController {
SolrCore core, CoreContainer cc) throws InterruptedException, SolrCore core, CoreContainer cc) throws InterruptedException,
KeeperException, IOException, ExecutionException { KeeperException, IOException, ExecutionException {
boolean doRecovery = true; boolean doRecovery = true;
if (!isLeader) {
if (isLeader) {
doRecovery = false;
// recover from local transaction log and wait for it to complete before
// going active
// TODO: should this be moved to another thread? To recoveryStrat?
// TODO: should this actually be done earlier, before (or as part of)
// leader election perhaps?
// TODO: ensure that a replica that is trying to recover waits until I'm
// active (or don't make me the
// leader until my local replay is done. But this replay is only needed
// on the leader - replicas
// will do recovery anyway
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
if (!core.isReloaded() && ulog != null) {
Future<UpdateLog.RecoveryInfo> recoveryFuture = core.getUpdateHandler()
.getUpdateLog().recoverFromLog();
if (recoveryFuture != null) {
recoveryFuture.get(); // NOTE: this could potentially block for
// minutes or more!
// TODO: public as recovering in the mean time?
}
}
return false;
} else {
if (core.isReloaded() && !recoverReloadedCores) { if (core.isReloaded() && !recoverReloadedCores) {
doRecovery = false; doRecovery = false;
} }
}
if (doRecovery && !SKIP_AUTO_RECOVERY) {
if (doRecovery && !SKIP_AUTO_RECOVERY) { log.info("Core needs to recover:" + core.getName());
log.info("Core needs to recover:" + core.getName()); core.getUpdateHandler().getSolrCoreState().doRecovery(core);
core.getUpdateHandler().getSolrCoreState().doRecovery(core); return true;
return true; }
} }
return false; return false;