mirror of https://github.com/apache/lucene.git
SOLR-5904: ElectionContext can cancel an election when it should not if there was an exception while trying to register as the leader.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1587702 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3dc2074d1b
commit
c7c9fc35a1
|
@ -94,6 +94,13 @@ Upgrading from Solr 4.8
|
|||
Detailed Change List
|
||||
----------------------
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
* SOLR-5904: ElectionContext can cancel an election when it should not if there
|
||||
was an exception while trying to register as the leader.
|
||||
(Mark Miller, Alan Woodward)
|
||||
|
||||
Other Changes
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -286,63 +286,27 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
+ ZkCoreNodeProps.getCoreUrl(leaderProps) + " " + shardId);
|
||||
core.getCoreDescriptor().getCloudDescriptor().setLeader(true);
|
||||
}
|
||||
boolean success = false;
|
||||
|
||||
try {
|
||||
super.runLeaderProcess(weAreReplacement, 0);
|
||||
success = true;
|
||||
} catch (Exception e) {
|
||||
SolrException.log(log, "There was a problem trying to register as the leader", e);
|
||||
|
||||
try (SolrCore core = cc.getCore(coreName)) {
|
||||
|
||||
if (core == null) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"Fatal Error, SolrCore not found:" + coreName + " in "
|
||||
+ cc.getCoreNames());
|
||||
log.debug("SolrCore not found:" + coreName + " in " + cc.getCoreNames());
|
||||
return;
|
||||
}
|
||||
|
||||
core.getCoreDescriptor().getCloudDescriptor().setLeader(false);
|
||||
|
||||
// we could not publish ourselves as leader - rejoin election
|
||||
// we could not publish ourselves as leader - try and rejoin election
|
||||
rejoinLeaderElection(leaderSeqPath, core);
|
||||
} finally {
|
||||
if (!success)
|
||||
cancelElection();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private boolean areAnyOtherReplicasActive(ZkController zkController,
|
||||
ZkNodeProps leaderProps, String collection, String shardId) {
|
||||
ClusterState clusterState = zkController.getZkStateReader()
|
||||
.getClusterState();
|
||||
Map<String,Slice> slices = clusterState.getSlicesMap(collection);
|
||||
Slice slice = slices.get(shardId);
|
||||
if (!slice.getState().equals(Slice.ACTIVE)) {
|
||||
//Return false if the Slice is not active yet.
|
||||
return false;
|
||||
}
|
||||
Map<String,Replica> replicasMap = slice.getReplicasMap();
|
||||
for (Map.Entry<String,Replica> shard : replicasMap.entrySet()) {
|
||||
String state = shard.getValue().getStr(ZkStateReader.STATE_PROP);
|
||||
// System.out.println("state:"
|
||||
// + state
|
||||
// + shard.getValue().get(ZkStateReader.NODE_NAME_PROP)
|
||||
// + " live: "
|
||||
// + clusterState.liveNodesContain(shard.getValue().get(
|
||||
// ZkStateReader.NODE_NAME_PROP)));
|
||||
if (state.equals(ZkStateReader.ACTIVE)
|
||||
&& clusterState.liveNodesContain(shard.getValue().getStr(
|
||||
ZkStateReader.NODE_NAME_PROP))
|
||||
&& !new ZkCoreNodeProps(shard.getValue()).getCoreUrl().equals(
|
||||
new ZkCoreNodeProps(leaderProps).getCoreUrl())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private void waitForReplicasToComeUp(boolean weAreReplacement, int timeoutms) throws InterruptedException {
|
||||
long timeoutAt = System.nanoTime() + TimeUnit.NANOSECONDS.convert(timeoutms, TimeUnit.MILLISECONDS);
|
||||
|
|
Loading…
Reference in New Issue