SOLR-5476 avoiding race condition

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1566247 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Noble Paul 2014-02-09 07:28:56 +00:00
parent 450e6204f4
commit 0a3e6856e4
4 changed files with 18 additions and 12 deletions

View File

@ -71,7 +71,7 @@ public abstract class ElectionContext {
}
}
abstract void runLeaderProcess(boolean weAreReplacement) throws KeeperException, InterruptedException, IOException;
abstract void runLeaderProcess(boolean weAreReplacement, int pauseTime) throws KeeperException, InterruptedException, IOException;
public void checkIfIamLeaderFired() {}
@ -106,7 +106,7 @@ class ShardLeaderElectionContextBase extends ElectionContext {
}
@Override
void runLeaderProcess(boolean weAreReplacement) throws KeeperException,
void runLeaderProcess(boolean weAreReplacement, int pauseBeforeStart) throws KeeperException,
InterruptedException, IOException {
zkClient.makePath(leaderPath, ZkStateReader.toJSON(leaderProps),
@ -154,7 +154,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
* weAreReplacement: has someone else been the leader already?
*/
@Override
void runLeaderProcess(boolean weAreReplacement) throws KeeperException,
void runLeaderProcess(boolean weAreReplacement, int pauseBeforeStart) throws KeeperException,
InterruptedException, IOException {
log.info("Running the leader process for shard " + shardId);
@ -270,7 +270,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
}
boolean success = false;
try {
super.runLeaderProcess(weAreReplacement);
super.runLeaderProcess(weAreReplacement, 0);
success = true;
} catch (Exception e) {
SolrException.log(log, "There was a problem trying to register as the leader", e);
@ -449,7 +449,7 @@ final class OverseerElectionContext extends ElectionContext {
}
@Override
void runLeaderProcess(boolean weAreReplacement) throws KeeperException,
void runLeaderProcess(boolean weAreReplacement, int pauseBeforeStart) throws KeeperException,
InterruptedException {
log.info("I am going to be the leader {}", id);
final String id = leaderSeqPath
@ -458,6 +458,13 @@ final class OverseerElectionContext extends ElectionContext {
zkClient.makePath(leaderPath, ZkStateReader.toJSON(myProps),
CreateMode.EPHEMERAL, true);
if(pauseBeforeStart >0){
try {
Thread.sleep(pauseBeforeStart);
} catch (InterruptedException e) {
log.warn("Wait interrupted ", e);
}
}
overseer.start(id);
}

View File

@ -161,7 +161,7 @@ public class LeaderElector {
// TODO: get this core param out of here
protected void runIamLeaderProcess(final ElectionContext context, boolean weAreReplacement) throws KeeperException,
InterruptedException, IOException {
context.runLeaderProcess(weAreReplacement);
context.runLeaderProcess(weAreReplacement,0);
}
/**

View File

@ -59,10 +59,10 @@ public class Overseer {
public static final String ADD_ROUTING_RULE = "addroutingrule";
public static final String REMOVE_ROUTING_RULE = "removeroutingrule";
private static final int STATE_UPDATE_DELAY = 1500; // delay between cloud state updates
public static final int STATE_UPDATE_DELAY = 1500; // delay between cloud state updates
private static Logger log = LoggerFactory.getLogger(Overseer.class);
static enum LeaderStatus { DONT_KNOW, NO, YES };
private long lastUpdatedTime = 0;
@ -89,7 +89,7 @@ public class Overseer {
@Override
public void run() {
LeaderStatus isLeader = amILeader();
while (isLeader == LeaderStatus.DONT_KNOW) {
log.debug("am_i_leader unclear {}", isLeader);

View File

@ -295,9 +295,8 @@ public final class ZkController {
public void forceOverSeer(){
try {
zkClient.delete("/overseer_elect/leader",-1, true);
log.info("Forcing me to be leader {} ",getBaseUrl());
overseerElector.getContext().runLeaderProcess(true);
rejoinOverseerElection();
log.info("Forcing me to be leader {} ", getBaseUrl());
overseerElector.getContext().runLeaderProcess(true, Overseer.STATE_UPDATE_DELAY+100);
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, " Error becoming overseer ",e);