SOLR-3080: do tlog recovery for all nodes, not just leaders

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1293863 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2012-02-26 16:25:32 +00:00
parent 9d11291c02
commit 1c84da1064
1 changed files with 27 additions and 21 deletions

View File

@ -273,6 +273,13 @@ public final class ZkController {
return zkStateReader.getCloudState(); return zkStateReader.getCloudState();
} }
/** @return the CoreState for the core, which may not yet be visible to ZooKeeper or other nodes in the cluster */
public CoreState getCoreState(String coreName) {
synchronized (coreStates) {
return coreStates.get(coreName);
}
}
/** /**
* @param zkConfigName * @param zkConfigName
* @param fileName * @param fileName
@ -557,30 +564,29 @@ public final class ZkController {
try { try {
core = cc.getCore(desc.getName()); core = cc.getCore(desc.getName());
if (isLeader) {
// recover from local transaction log and wait for it to complete before
// going active
// TODO: should this be moved to another thread? To recoveryStrat?
// TODO: should this actually be done earlier, before (or as part of)
// leader election perhaps?
// TODO: ensure that a replica that is trying to recover waits until I'm
// active (or don't make me the
// leader until my local replay is done. But this replay is only needed
// on the leader - replicas
// will do recovery anyway
UpdateLog ulog = core.getUpdateHandler().getUpdateLog(); // recover from local transaction log and wait for it to complete before
if (!core.isReloaded() && ulog != null) { // going active
Future<UpdateLog.RecoveryInfo> recoveryFuture = core.getUpdateHandler() // TODO: should this be moved to another thread? To recoveryStrat?
.getUpdateLog().recoverFromLog(); // TODO: should this actually be done earlier, before (or as part of)
if (recoveryFuture != null) { // leader election perhaps?
recoveryFuture.get(); // NOTE: this could potentially block for // TODO: if I'm the leader, ensure that a replica that is trying to recover waits until I'm
// minutes or more! // active (or don't make me the
// TODO: public as recovering in the mean time? // leader until my local replay is done.
}
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
if (!core.isReloaded() && ulog != null) {
Future<UpdateLog.RecoveryInfo> recoveryFuture = core.getUpdateHandler()
.getUpdateLog().recoverFromLog();
if (recoveryFuture != null) {
recoveryFuture.get(); // NOTE: this could potentially block for
// minutes or more!
// TODO: public as recovering in the mean time?
// TODO: in the future we could do peerync in parallel with recoverFromLog
} }
} }
boolean didRecovery = checkRecovery(coreName, desc, recoverReloadedCores, isLeader, cloudDesc, boolean didRecovery = checkRecovery(coreName, desc, recoverReloadedCores, isLeader, cloudDesc,
collection, coreZkNodeName, shardId, leaderProps, core, cc); collection, coreZkNodeName, shardId, leaderProps, core, cc);
if (!didRecovery) { if (!didRecovery) {