SOLR-8696: Start the Overseer before actions that need the overseer on init and when reconnecting after zk expiration and improve init logic.

This commit is contained in:
markrmiller 2016-02-23 13:59:04 -05:00
parent 7d32456efa
commit 8ac4fdd6bb
2 changed files with 17 additions and 22 deletions

View File

@ -215,6 +215,9 @@ Bug Fixes
* SOLR-8497: Merge index does not mark the Directory objects it creates as 'done' and they are retained in the * SOLR-8497: Merge index does not mark the Directory objects it creates as 'done' and they are retained in the
Directory cache. (Sivlio Sanchez, Mark Miller) Directory cache. (Sivlio Sanchez, Mark Miller)
* SOLR-8696: Start the Overseer before actions that need the overseer on init and when reconnecting after
zk expiration and improve init logic. (Scott Blum, Mark Miller)
Optimizations Optimizations
---------------------- ----------------------
* SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been * SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been

View File

@ -266,11 +266,8 @@ public final class ZkController {
// seems we dont need to do this again... // seems we dont need to do this again...
// Overseer.createClientNodes(zkClient, getNodeName()); // Overseer.createClientNodes(zkClient, getNodeName());
cc.cancelCoreRecoveries(); // start the overseer first as following code may need it's processing
registerAllCoresAsDown(registerOnReconnect, false);
if (!zkRunOnly) { if (!zkRunOnly) {
ElectionContext context = new OverseerElectionContext(zkClient, ElectionContext context = new OverseerElectionContext(zkClient,
overseer, getNodeName()); overseer, getNodeName());
@ -284,6 +281,10 @@ public final class ZkController {
overseerElector.joinElection(context, true); overseerElector.joinElection(context, true);
} }
cc.cancelCoreRecoveries();
registerAllCoresAsDown(registerOnReconnect, false);
zkStateReader.createClusterStateWatchersAndUpdate(); zkStateReader.createClusterStateWatchersAndUpdate();
// we have to register as live first to pick up docs in the buffer // we have to register as live first to pick up docs in the buffer
@ -620,26 +621,12 @@ public final class ZkController {
private void init(CurrentCoreDescriptorProvider registerOnReconnect) { private void init(CurrentCoreDescriptorProvider registerOnReconnect) {
try { try {
boolean createdWatchesAndUpdated = false;
Stat stat = zkClient.exists(ZkStateReader.LIVE_NODES_ZKNODE, null, true);
if (stat != null && stat.getNumChildren() > 0) {
zkStateReader.createClusterStateWatchersAndUpdate();
createdWatchesAndUpdated = true;
publishAndWaitForDownStates();
}
createClusterZkNodes(zkClient); createClusterZkNodes(zkClient);
createEphemeralLiveNode(); // start the overseer first as following code may need it's processing
ShardHandler shardHandler;
UpdateShardHandler updateShardHandler;
shardHandler = cc.getShardHandlerFactory().getShardHandler();
updateShardHandler = cc.getUpdateShardHandler();
if (!zkRunOnly) { if (!zkRunOnly) {
overseerElector = new LeaderElector(zkClient); overseerElector = new LeaderElector(zkClient);
this.overseer = new Overseer(shardHandler, updateShardHandler, this.overseer = new Overseer(cc.getShardHandlerFactory().getShardHandler(), cc.getUpdateShardHandler(),
CommonParams.CORES_HANDLER_PATH, zkStateReader, this, cloudConfig); CommonParams.CORES_HANDLER_PATH, zkStateReader, this, cloudConfig);
ElectionContext context = new OverseerElectionContext(zkClient, ElectionContext context = new OverseerElectionContext(zkClient,
overseer, getNodeName()); overseer, getNodeName());
@ -647,10 +634,15 @@ public final class ZkController {
overseerElector.joinElection(context, false); overseerElector.joinElection(context, false);
} }
if (!createdWatchesAndUpdated) { zkStateReader.createClusterStateWatchersAndUpdate();
Stat stat = zkClient.exists(ZkStateReader.LIVE_NODES_ZKNODE, null, true);
if (stat != null && stat.getNumChildren() > 0) {
zkStateReader.createClusterStateWatchersAndUpdate(); zkStateReader.createClusterStateWatchersAndUpdate();
publishAndWaitForDownStates();
} }
// Do this last to signal we're up.
createEphemeralLiveNode();
} catch (IOException e) { } catch (IOException e) {
log.error("", e); log.error("", e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,