[Discovery] Eagerly clean the routing table of shards that exist on nodes that are not in the latestDiscoNodes list.

Only the previous master node has been removed, so only shards allocated to that node will get failed. This would have happened anyhow on later on when AllocationService#reroute is invoked (for example when a cluster setting changes or another cluster event), but by cleaning the routing table pro-actively, the stale routing table is fixed sooner and therefor the shards that are not accessible anyhow (because the node these shards were on has left the cluster) will get re-assigned sooner.
2014-05-02 12:50:30 +07:00 · 2014-05-02 12:50:30 +07:00 · 2220c66535
parent 89a50f6013
commit 2220c66535
2 changed files with 7 additions and 5 deletions
--- a/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java
+++ b/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java
@ -327,7 +327,11 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
                        // update the fact that we are the master...
                        latestDiscoNodes = builder.build();
                        ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(discoverySettings.getNoMasterBlock()).build();
-                        return ClusterState.builder(currentState).nodes(latestDiscoNodes).blocks(clusterBlocks).build();
+                        currentState = ClusterState.builder(currentState).nodes(latestDiscoNodes).blocks(clusterBlocks).build();
+
+                        // eagerly run reroute to remove dead nodes from routing table
+                        RoutingAllocation.Result result = allocationService.reroute(currentState);
+                        return ClusterState.builder(currentState).routingResult(result).build();
                    }

                    @Override
--- a/src/test/java/org/elasticsearch/recovery/FullRollingRestartTests.java
+++ b/src/test/java/org/elasticsearch/recovery/FullRollingRestartTests.java
@ -19,7 +19,6 @@

 package org.elasticsearch.recovery;

-import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequestBuilder;
 import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
@ -31,7 +30,7 @@ import org.elasticsearch.test.junit.annotations.TestLogging;
 import org.junit.Test;

 import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
-import static org.elasticsearch.test.ElasticsearchIntegrationTest.*;
+import static org.elasticsearch.test.ElasticsearchIntegrationTest.Scope;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;

 /**
@ -55,8 +54,7 @@ public class FullRollingRestartTests extends ElasticsearchIntegrationTest {

    @Test
    @Slow
-    @TestLogging("indices.cluster:TRACE,cluster.service:TRACE")
-    @LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elasticsearch/elasticsearch/tree/feature/improve_zen")
+    @TestLogging("indices.cluster:TRACE,cluster.service:TRACE,action.search:TRACE,indices.recovery:TRACE")
    public void testFullRollingRestart() throws Exception {
        internalCluster().startNode();
        createIndex("test");