[Discovery] Eagerly clean the routing table of shards that exist on nodes that are not in the latestDiscoNodes list.

Only the previous master node has been removed, so only shards allocated to that node will get failed.
This would have happened anyhow on later on when AllocationService#reroute is invoked (for example when a cluster setting changes or another cluster event),
but by cleaning the routing table pro-actively, the stale routing table is fixed sooner and therefor the shards
that are not accessible anyhow (because the node these shards were on has left the cluster) will get re-assigned sooner.
This commit is contained in:
Martijn van Groningen 2014-05-02 12:50:30 +07:00 committed by Boaz Leskes
parent 89a50f6013
commit 2220c66535
2 changed files with 7 additions and 5 deletions

View File

@ -327,7 +327,11 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
// update the fact that we are the master... // update the fact that we are the master...
latestDiscoNodes = builder.build(); latestDiscoNodes = builder.build();
ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(discoverySettings.getNoMasterBlock()).build(); ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(discoverySettings.getNoMasterBlock()).build();
return ClusterState.builder(currentState).nodes(latestDiscoNodes).blocks(clusterBlocks).build(); currentState = ClusterState.builder(currentState).nodes(latestDiscoNodes).blocks(clusterBlocks).build();
// eagerly run reroute to remove dead nodes from routing table
RoutingAllocation.Result result = allocationService.reroute(currentState);
return ClusterState.builder(currentState).routingResult(result).build();
} }
@Override @Override

View File

@ -19,7 +19,6 @@
package org.elasticsearch.recovery; package org.elasticsearch.recovery;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.lucene.util.LuceneTestCase.Slow;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequestBuilder; import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequestBuilder;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
@ -31,7 +30,7 @@ import org.elasticsearch.test.junit.annotations.TestLogging;
import org.junit.Test; import org.junit.Test;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.test.ElasticsearchIntegrationTest.*; import static org.elasticsearch.test.ElasticsearchIntegrationTest.Scope;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
/** /**
@ -55,8 +54,7 @@ public class FullRollingRestartTests extends ElasticsearchIntegrationTest {
@Test @Test
@Slow @Slow
@TestLogging("indices.cluster:TRACE,cluster.service:TRACE") @TestLogging("indices.cluster:TRACE,cluster.service:TRACE,action.search:TRACE,indices.recovery:TRACE")
@LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elasticsearch/elasticsearch/tree/feature/improve_zen")
public void testFullRollingRestart() throws Exception { public void testFullRollingRestart() throws Exception {
internalCluster().startNode(); internalCluster().startNode();
createIndex("test"); createIndex("test");