parent
a79cd77e5c
commit
250973af1d
|
@ -31,6 +31,7 @@ import org.elasticsearch.client.Client;
|
|||
import org.elasticsearch.cluster.ClusterState;
|
||||
import org.elasticsearch.cluster.action.shard.ShardStateAction;
|
||||
import org.elasticsearch.cluster.coordination.ClusterBootstrapService;
|
||||
import org.elasticsearch.cluster.coordination.LagDetector;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.cluster.routing.Murmur3HashFunction;
|
||||
import org.elasticsearch.cluster.routing.ShardRouting;
|
||||
|
@ -388,7 +389,6 @@ public class ClusterDisruptionIT extends AbstractDisruptionTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/41047")
|
||||
public void testCannotJoinIfMasterLostDataFolder() throws Exception {
|
||||
String masterNode = internalCluster().startMasterOnlyNode();
|
||||
String dataNode = internalCluster().startDataOnlyNode();
|
||||
|
@ -401,7 +401,18 @@ public class ClusterDisruptionIT extends AbstractDisruptionTestCase {
|
|||
|
||||
@Override
|
||||
public Settings onNodeStopped(String nodeName) {
|
||||
return Settings.builder().put(ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING.getKey(), nodeName).build();
|
||||
return Settings.builder()
|
||||
.put(ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING.getKey(), nodeName)
|
||||
/*
|
||||
* the data node might join while the master is still not fully established as master just yet and bypasses the join
|
||||
* validation that is done before adding the node to the cluster. Only the join validation when handling the publish
|
||||
* request takes place, but at this point the cluster state has been successfully committed, and will subsequently be
|
||||
* exposed to the applier. The health check below therefore sees the cluster state with the 2 nodes and thinks all is
|
||||
* good, even though the data node never accepted this state. What's worse is that it takes 90 seconds for the data
|
||||
* node to be kicked out of the cluster (lag detection). We speed this up here.
|
||||
*/
|
||||
.put(LagDetector.CLUSTER_FOLLOWER_LAG_TIMEOUT_SETTING.getKey(), "10s")
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -410,9 +421,11 @@ public class ClusterDisruptionIT extends AbstractDisruptionTestCase {
|
|||
}
|
||||
});
|
||||
|
||||
assertFalse(internalCluster().client(masterNode).admin().cluster().prepareHealth().get().isTimedOut());
|
||||
assertTrue(internalCluster().client(masterNode).admin().cluster().prepareHealth().setWaitForNodes("2").setTimeout("2s").get()
|
||||
.isTimedOut());
|
||||
assertBusy(() -> {
|
||||
assertFalse(internalCluster().client(masterNode).admin().cluster().prepareHealth().get().isTimedOut());
|
||||
assertTrue(internalCluster().client(masterNode).admin().cluster().prepareHealth().setWaitForNodes("2").setTimeout("2s").get()
|
||||
.isTimedOut());
|
||||
}, 30, TimeUnit.SECONDS);
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(dataNode)); // otherwise we will fail during clean-up
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue