Replace internal usages of 'master' term in 'server/src/internalClusterTest' directory (#2521)
Signed-off-by: Tianli Feng <ftianli@amazon.com>
This commit is contained in:
parent
704600871c
commit
507f8ccdbd
|
@ -91,7 +91,7 @@ public class PendingTasksBlocksIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
|
||||
// restart the cluster but prevent it from performing state recovery
|
||||
final int nodeCount = client().admin().cluster().prepareNodesInfo("data:true", "master:true").get().getNodes().size();
|
||||
final int nodeCount = client().admin().cluster().prepareNodesInfo("data:true", "cluster_manager:true").get().getNodes().size();
|
||||
internalCluster().fullRestart(new InternalTestCluster.RestartCallback() {
|
||||
@Override
|
||||
public Settings onNodeStopped(String nodeName) {
|
||||
|
@ -107,7 +107,7 @@ public class PendingTasksBlocksIT extends OpenSearchIntegTestCase {
|
|||
assertNotNull(client().admin().cluster().preparePendingClusterTasks().get().getPendingTasks());
|
||||
|
||||
// starting one more node allows the cluster to recover
|
||||
internalCluster().startDataOnlyNode(); // cannot update minimum_master_nodes before the cluster has formed
|
||||
internalCluster().startDataOnlyNode(); // cannot update minimum_cluster_manager_nodes before the cluster has formed
|
||||
ensureGreen();
|
||||
}
|
||||
|
||||
|
|
|
@ -48,7 +48,7 @@ import java.util.concurrent.CyclicBarrier;
|
|||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
|
||||
public class IndexingMasterFailoverIT extends OpenSearchIntegTestCase {
|
||||
public class IndexingClusterManagerFailoverIT extends OpenSearchIntegTestCase {
|
||||
|
||||
@Override
|
||||
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||
|
@ -58,12 +58,12 @@ public class IndexingMasterFailoverIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Indexing operations which entail mapping changes require a blocking request to the master node to update the mapping.
|
||||
* If the master node is being disrupted or if it cannot commit cluster state changes, it needs to retry within timeout limits.
|
||||
* This retry logic is implemented in TransportMasterNodeAction and tested by the following master failover scenario.
|
||||
* Indexing operations which entail mapping changes require a blocking request to the cluster-manager node to update the mapping.
|
||||
* If the cluster-manager node is being disrupted or if it cannot commit cluster state changes, it needs to retry within timeout limits.
|
||||
* This retry logic is implemented in TransportMasterNodeAction and tested by the following cluster-manager failover scenario.
|
||||
*/
|
||||
public void testMasterFailoverDuringIndexingWithMappingChanges() throws Throwable {
|
||||
logger.info("--> start 4 nodes, 3 master, 1 data");
|
||||
public void testClusterManagerFailoverDuringIndexingWithMappingChanges() throws Throwable {
|
||||
logger.info("--> start 4 nodes, 3 cluster-manager, 1 data");
|
||||
|
||||
internalCluster().setBootstrapClusterManagerNodeIndex(2);
|
||||
|
||||
|
@ -74,7 +74,7 @@ public class IndexingMasterFailoverIT extends OpenSearchIntegTestCase {
|
|||
logger.info("--> wait for all nodes to join the cluster");
|
||||
ensureStableCluster(4);
|
||||
|
||||
// We index data with mapping changes into cluster and have master failover at same time
|
||||
// We index data with mapping changes into cluster and have cluster-manager failover at same time
|
||||
client().admin()
|
||||
.indices()
|
||||
.prepareCreate("myindex")
|
||||
|
@ -108,14 +108,14 @@ public class IndexingMasterFailoverIT extends OpenSearchIntegTestCase {
|
|||
|
||||
barrier.await();
|
||||
|
||||
// interrupt communication between master and other nodes in cluster
|
||||
NetworkDisruption partition = isolateMasterDisruption(NetworkDisruption.DISCONNECT);
|
||||
// interrupt communication between cluster-manager and other nodes in cluster
|
||||
NetworkDisruption partition = isolateClusterManagerDisruption(NetworkDisruption.DISCONNECT);
|
||||
internalCluster().setDisruptionScheme(partition);
|
||||
|
||||
logger.info("--> disrupting network");
|
||||
partition.startDisrupting();
|
||||
|
||||
logger.info("--> waiting for new master to be elected");
|
||||
logger.info("--> waiting for new cluster-manager to be elected");
|
||||
ensureStableCluster(3, dataNode);
|
||||
|
||||
partition.stopDisrupting();
|
|
@ -60,7 +60,7 @@ public class ClusterHealthIT extends OpenSearchIntegTestCase {
|
|||
|
||||
public void testSimpleLocalHealth() {
|
||||
createIndex("test");
|
||||
ensureGreen(); // master should think it's green now.
|
||||
ensureGreen(); // cluster-manager should think it's green now.
|
||||
|
||||
for (final String node : internalCluster().getNodeNames()) {
|
||||
// a very high time out, which should never fire due to the local flag
|
||||
|
@ -336,7 +336,7 @@ public class ClusterHealthIT extends OpenSearchIntegTestCase {
|
|||
assertFalse(client().admin().cluster().prepareHealth("index").setWaitForGreenStatus().get().isTimedOut());
|
||||
|
||||
// at this point the original health response should not have returned: there was never a point where the index was green AND
|
||||
// the master had processed all pending tasks above LANGUID priority.
|
||||
// the cluster-manager had processed all pending tasks above LANGUID priority.
|
||||
assertFalse(healthResponseFuture.isDone());
|
||||
keepSubmittingTasks.set(false);
|
||||
assertFalse(healthResponseFuture.actionGet(TimeValue.timeValueSeconds(30)).isTimedOut());
|
||||
|
@ -346,14 +346,14 @@ public class ClusterHealthIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testHealthOnMasterFailover() throws Exception {
|
||||
public void testHealthOnClusterManagerFailover() throws Exception {
|
||||
final String node = internalCluster().startDataOnlyNode();
|
||||
final boolean withIndex = randomBoolean();
|
||||
if (withIndex) {
|
||||
// Create index with many shards to provoke the health request to wait (for green) while master is being shut down.
|
||||
// Notice that this is set to 0 after the test completed starting a number of health requests and master restarts.
|
||||
// Create index with many shards to provoke the health request to wait (for green) while cluster-manager is being shut down.
|
||||
// Notice that this is set to 0 after the test completed starting a number of health requests and cluster-manager restarts.
|
||||
// This ensures that the cluster is yellow when the health request is made, making the health request wait on the observer,
|
||||
// triggering a call to observer.onClusterServiceClose when master is shutdown.
|
||||
// triggering a call to observer.onClusterServiceClose when cluster-manager is shutdown.
|
||||
createIndex(
|
||||
"test",
|
||||
Settings.builder()
|
||||
|
@ -364,8 +364,8 @@ public class ClusterHealthIT extends OpenSearchIntegTestCase {
|
|||
);
|
||||
}
|
||||
final List<ActionFuture<ClusterHealthResponse>> responseFutures = new ArrayList<>();
|
||||
// Run a few health requests concurrent to master fail-overs against a data-node to make sure master failover is handled
|
||||
// without exceptions
|
||||
// Run a few health requests concurrent to cluster-manager fail-overs against a data-node
|
||||
// to make sure cluster-manager failover is handled without exceptions
|
||||
final int iterations = withIndex ? 10 : 20;
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
responseFutures.add(
|
||||
|
@ -394,7 +394,7 @@ public class ClusterHealthIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testWaitForEventsTimesOutIfMasterBusy() {
|
||||
public void testWaitForEventsTimesOutIfClusterManagerBusy() {
|
||||
final AtomicBoolean keepSubmittingTasks = new AtomicBoolean(true);
|
||||
final ClusterService clusterService = internalCluster().getInstance(ClusterService.class, internalCluster().getMasterName());
|
||||
final PlainActionFuture<Void> completionFuture = new PlainActionFuture<>();
|
||||
|
|
|
@ -166,7 +166,7 @@ public class ClusterInfoServiceIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
ensureGreen(indexName);
|
||||
InternalTestCluster internalTestCluster = internalCluster();
|
||||
// Get the cluster info service on the master node
|
||||
// Get the cluster info service on the cluster-manager node
|
||||
final InternalClusterInfoService infoService = (InternalClusterInfoService) internalTestCluster.getInstance(
|
||||
ClusterInfoService.class,
|
||||
internalTestCluster.getMasterName()
|
||||
|
|
|
@ -93,7 +93,7 @@ import static org.hamcrest.Matchers.is;
|
|||
public class ClusterStateDiffIT extends OpenSearchIntegTestCase {
|
||||
public void testClusterStateDiffSerialization() throws Exception {
|
||||
NamedWriteableRegistry namedWriteableRegistry = new NamedWriteableRegistry(ClusterModule.getNamedWriteables());
|
||||
DiscoveryNode clusterManagerNode = randomNode("master");
|
||||
DiscoveryNode clusterManagerNode = randomNode("cluster-manager");
|
||||
DiscoveryNode otherNode = randomNode("other");
|
||||
DiscoveryNodes discoveryNodes = DiscoveryNodes.builder()
|
||||
.add(clusterManagerNode)
|
||||
|
|
|
@ -74,7 +74,7 @@ import static org.hamcrest.Matchers.notNullValue;
|
|||
import static org.hamcrest.Matchers.nullValue;
|
||||
|
||||
@ClusterScope(scope = Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
|
||||
public class MinimumMasterNodesIT extends OpenSearchIntegTestCase {
|
||||
public class MinimumClusterManagerNodesIT extends OpenSearchIntegTestCase {
|
||||
|
||||
@Override
|
||||
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||
|
@ -83,7 +83,7 @@ public class MinimumMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
return classes;
|
||||
}
|
||||
|
||||
public void testTwoNodesNoMasterBlock() throws Exception {
|
||||
public void testTwoNodesNoClusterManagerBlock() throws Exception {
|
||||
internalCluster().setBootstrapClusterManagerNodeIndex(1);
|
||||
|
||||
Settings settings = Settings.builder().put("discovery.initial_state_timeout", "500ms").build();
|
||||
|
@ -151,13 +151,13 @@ public class MinimumMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
);
|
||||
}
|
||||
|
||||
String masterNode = internalCluster().getMasterName();
|
||||
String otherNode = node1Name.equals(masterNode) ? node2Name : node1Name;
|
||||
logger.info("--> add voting config exclusion for non-master node, to be sure it's not elected");
|
||||
String clusterManagerNode = internalCluster().getMasterName();
|
||||
String otherNode = node1Name.equals(clusterManagerNode) ? node2Name : node1Name;
|
||||
logger.info("--> add voting config exclusion for non-cluster-manager node, to be sure it's not elected");
|
||||
client().execute(AddVotingConfigExclusionsAction.INSTANCE, new AddVotingConfigExclusionsRequest(otherNode)).get();
|
||||
logger.info("--> stop master node, no cluster-manager block should appear");
|
||||
Settings masterDataPathSettings = internalCluster().dataPathSettings(masterNode);
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(masterNode));
|
||||
logger.info("--> stop cluster-manager node, no cluster-manager block should appear");
|
||||
Settings clusterManagerDataPathSettings = internalCluster().dataPathSettings(clusterManagerNode);
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(clusterManagerNode));
|
||||
|
||||
assertBusy(() -> {
|
||||
ClusterState clusterState = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
|
||||
|
@ -170,8 +170,8 @@ public class MinimumMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
assertThat(state.nodes().getSize(), equalTo(2));
|
||||
assertThat(state.nodes().getMasterNode(), equalTo(null));
|
||||
|
||||
logger.info("--> starting the previous master node again...");
|
||||
node2Name = internalCluster().startNode(Settings.builder().put(settings).put(masterDataPathSettings).build());
|
||||
logger.info("--> starting the previous cluster-manager node again...");
|
||||
node2Name = internalCluster().startNode(Settings.builder().put(settings).put(clusterManagerDataPathSettings).build());
|
||||
|
||||
clusterHealthResponse = client().admin()
|
||||
.cluster()
|
||||
|
@ -204,11 +204,11 @@ public class MinimumMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
clearRequest.setWaitForRemoval(false);
|
||||
client().execute(ClearVotingConfigExclusionsAction.INSTANCE, clearRequest).get();
|
||||
|
||||
masterNode = internalCluster().getMasterName();
|
||||
otherNode = node1Name.equals(masterNode) ? node2Name : node1Name;
|
||||
logger.info("--> add voting config exclusion for master node, to be sure it's not elected");
|
||||
client().execute(AddVotingConfigExclusionsAction.INSTANCE, new AddVotingConfigExclusionsRequest(masterNode)).get();
|
||||
logger.info("--> stop non-master node, no cluster-manager block should appear");
|
||||
clusterManagerNode = internalCluster().getMasterName();
|
||||
otherNode = node1Name.equals(clusterManagerNode) ? node2Name : node1Name;
|
||||
logger.info("--> add voting config exclusion for cluster-manager node, to be sure it's not elected");
|
||||
client().execute(AddVotingConfigExclusionsAction.INSTANCE, new AddVotingConfigExclusionsRequest(clusterManagerNode)).get();
|
||||
logger.info("--> stop non-cluster-manager node, no cluster-manager block should appear");
|
||||
Settings otherNodeDataPathSettings = internalCluster().dataPathSettings(otherNode);
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(otherNode));
|
||||
|
||||
|
@ -217,7 +217,7 @@ public class MinimumMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
assertThat(state1.blocks().hasGlobalBlockWithId(NoMasterBlockService.NO_MASTER_BLOCK_ID), equalTo(true));
|
||||
});
|
||||
|
||||
logger.info("--> starting the previous master node again...");
|
||||
logger.info("--> starting the previous cluster-manager node again...");
|
||||
internalCluster().startNode(Settings.builder().put(settings).put(otherNodeDataPathSettings).build());
|
||||
|
||||
ensureGreen();
|
||||
|
@ -249,7 +249,7 @@ public class MinimumMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testThreeNodesNoMasterBlock() throws Exception {
|
||||
public void testThreeNodesNoClusterManagerBlock() throws Exception {
|
||||
internalCluster().setBootstrapClusterManagerNodeIndex(2);
|
||||
|
||||
Settings settings = Settings.builder().put("discovery.initial_state_timeout", "500ms").build();
|
||||
|
@ -312,8 +312,8 @@ public class MinimumMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
List<String> nonClusterManagerNodes = new ArrayList<>(
|
||||
Sets.difference(Sets.newHashSet(internalCluster().getNodeNames()), Collections.singleton(internalCluster().getMasterName()))
|
||||
);
|
||||
Settings nonMasterDataPathSettings1 = internalCluster().dataPathSettings(nonClusterManagerNodes.get(0));
|
||||
Settings nonMasterDataPathSettings2 = internalCluster().dataPathSettings(nonClusterManagerNodes.get(1));
|
||||
Settings nonClusterManagerDataPathSettings1 = internalCluster().dataPathSettings(nonClusterManagerNodes.get(0));
|
||||
Settings nonClusterManagerDataPathSettings2 = internalCluster().dataPathSettings(nonClusterManagerNodes.get(1));
|
||||
internalCluster().stopRandomNonMasterNode();
|
||||
internalCluster().stopRandomNonMasterNode();
|
||||
|
||||
|
@ -325,7 +325,7 @@ public class MinimumMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
});
|
||||
|
||||
logger.info("--> start back the 2 nodes ");
|
||||
internalCluster().startNodes(nonMasterDataPathSettings1, nonMasterDataPathSettings2);
|
||||
internalCluster().startNodes(nonClusterManagerDataPathSettings1, nonClusterManagerDataPathSettings2);
|
||||
|
||||
internalCluster().validateClusterFormed();
|
||||
ensureGreen();
|
||||
|
@ -347,17 +347,17 @@ public class MinimumMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
internalCluster().startNodes(3, settings);
|
||||
ensureStableCluster(3);
|
||||
|
||||
final String master = internalCluster().getMasterName();
|
||||
final String clusterManager = internalCluster().getMasterName();
|
||||
Set<String> otherNodes = new HashSet<>(Arrays.asList(internalCluster().getNodeNames()));
|
||||
otherNodes.remove(master);
|
||||
NetworkDisruption partition = isolateMasterDisruption(NetworkDisruption.DISCONNECT);
|
||||
otherNodes.remove(clusterManager);
|
||||
NetworkDisruption partition = isolateClusterManagerDisruption(NetworkDisruption.DISCONNECT);
|
||||
internalCluster().setDisruptionScheme(partition);
|
||||
|
||||
final CountDownLatch latch = new CountDownLatch(1);
|
||||
final AtomicReference<Exception> failure = new AtomicReference<>();
|
||||
logger.debug("--> submitting for cluster state to be rejected");
|
||||
final ClusterService masterClusterService = internalCluster().clusterService(master);
|
||||
masterClusterService.submitStateUpdateTask("test", new ClusterStateUpdateTask() {
|
||||
final ClusterService clusterManagerClusterService = internalCluster().clusterService(clusterManager);
|
||||
clusterManagerClusterService.submitStateUpdateTask("test", new ClusterStateUpdateTask() {
|
||||
@Override
|
||||
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
|
||||
latch.countDown();
|
||||
|
@ -387,11 +387,11 @@ public class MinimumMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
assertThat(failure.get(), instanceOf(FailedToCommitClusterStateException.class));
|
||||
|
||||
logger.debug("--> check that there is no cluster-manager in minor partition");
|
||||
assertBusy(() -> assertThat(masterClusterService.state().nodes().getMasterNode(), nullValue()));
|
||||
assertBusy(() -> assertThat(clusterManagerClusterService.state().nodes().getMasterNode(), nullValue()));
|
||||
|
||||
// let major partition to elect new master, to ensure that old master is not elected once partition is restored,
|
||||
// otherwise persistent setting (which is a part of accepted state on old master) will be propagated to other nodes
|
||||
logger.debug("--> wait for master to be elected in major partition");
|
||||
// let major partition to elect new cluster-manager, to ensure that old cluster-manager is not elected once partition is restored,
|
||||
// otherwise persistent setting (which is a part of accepted state on old cluster-manager) will be propagated to other nodes
|
||||
logger.debug("--> wait for cluster-manager to be elected in major partition");
|
||||
assertBusy(() -> {
|
||||
DiscoveryNode clusterManagerNode = internalCluster().client(randomFrom(otherNodes))
|
||||
.admin()
|
||||
|
@ -403,7 +403,7 @@ public class MinimumMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.nodes()
|
||||
.getMasterNode();
|
||||
assertThat(clusterManagerNode, notNullValue());
|
||||
assertThat(clusterManagerNode.getName(), not(equalTo(master)));
|
||||
assertThat(clusterManagerNode.getName(), not(equalTo(clusterManager)));
|
||||
});
|
||||
|
||||
partition.stopDisrupting();
|
||||
|
@ -414,7 +414,7 @@ public class MinimumMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
for (String node : internalCluster().getNodeNames()) {
|
||||
Settings nodeSetting = internalCluster().clusterService(node).state().metadata().settings();
|
||||
assertThat(
|
||||
node + " processed the cluster state despite of a min master node violation",
|
||||
node + " processed the cluster state despite of a min cluster-manager node violation",
|
||||
nodeSetting.get("_SHOULD_NOT_BE_THERE_"),
|
||||
nullValue()
|
||||
);
|
|
@ -75,7 +75,7 @@ import static org.hamcrest.Matchers.equalTo;
|
|||
import static org.hamcrest.Matchers.greaterThan;
|
||||
|
||||
@ClusterScope(scope = Scope.TEST, numDataNodes = 0)
|
||||
public class NoMasterNodeIT extends OpenSearchIntegTestCase {
|
||||
public class NoClusterManagerNodeIT extends OpenSearchIntegTestCase {
|
||||
|
||||
@Override
|
||||
protected int numberOfReplicas() {
|
||||
|
@ -87,7 +87,7 @@ public class NoMasterNodeIT extends OpenSearchIntegTestCase {
|
|||
return Collections.singletonList(MockTransportService.TestPlugin.class);
|
||||
}
|
||||
|
||||
public void testNoMasterActions() throws Exception {
|
||||
public void testNoClusterManagerActions() throws Exception {
|
||||
Settings settings = Settings.builder()
|
||||
.put(AutoCreateIndex.AUTO_CREATE_INDEX_SETTING.getKey(), true)
|
||||
.put(NoMasterBlockService.NO_CLUSTER_MANAGER_BLOCK_SETTING.getKey(), "all")
|
||||
|
@ -107,57 +107,63 @@ public class NoMasterNodeIT extends OpenSearchIntegTestCase {
|
|||
internalCluster().setDisruptionScheme(disruptionScheme);
|
||||
disruptionScheme.startDisrupting();
|
||||
|
||||
final Client clientToMasterlessNode = client();
|
||||
final Client clientToClusterManagerlessNode = client();
|
||||
|
||||
assertBusy(() -> {
|
||||
ClusterState state = clientToMasterlessNode.admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
|
||||
ClusterState state = clientToClusterManagerlessNode.admin()
|
||||
.cluster()
|
||||
.prepareState()
|
||||
.setLocal(true)
|
||||
.execute()
|
||||
.actionGet()
|
||||
.getState();
|
||||
assertTrue(state.blocks().hasGlobalBlockWithId(NoMasterBlockService.NO_MASTER_BLOCK_ID));
|
||||
});
|
||||
|
||||
assertRequestBuilderThrows(
|
||||
clientToMasterlessNode.prepareGet("test", "1"),
|
||||
clientToClusterManagerlessNode.prepareGet("test", "1"),
|
||||
ClusterBlockException.class,
|
||||
RestStatus.SERVICE_UNAVAILABLE
|
||||
);
|
||||
|
||||
assertRequestBuilderThrows(
|
||||
clientToMasterlessNode.prepareGet("no_index", "1"),
|
||||
clientToClusterManagerlessNode.prepareGet("no_index", "1"),
|
||||
ClusterBlockException.class,
|
||||
RestStatus.SERVICE_UNAVAILABLE
|
||||
);
|
||||
|
||||
assertRequestBuilderThrows(
|
||||
clientToMasterlessNode.prepareMultiGet().add("test", "1"),
|
||||
clientToClusterManagerlessNode.prepareMultiGet().add("test", "1"),
|
||||
ClusterBlockException.class,
|
||||
RestStatus.SERVICE_UNAVAILABLE
|
||||
);
|
||||
|
||||
assertRequestBuilderThrows(
|
||||
clientToMasterlessNode.prepareMultiGet().add("no_index", "1"),
|
||||
clientToClusterManagerlessNode.prepareMultiGet().add("no_index", "1"),
|
||||
ClusterBlockException.class,
|
||||
RestStatus.SERVICE_UNAVAILABLE
|
||||
);
|
||||
|
||||
assertRequestBuilderThrows(
|
||||
clientToMasterlessNode.admin().indices().prepareAnalyze("test", "this is a test"),
|
||||
clientToClusterManagerlessNode.admin().indices().prepareAnalyze("test", "this is a test"),
|
||||
ClusterBlockException.class,
|
||||
RestStatus.SERVICE_UNAVAILABLE
|
||||
);
|
||||
|
||||
assertRequestBuilderThrows(
|
||||
clientToMasterlessNode.admin().indices().prepareAnalyze("no_index", "this is a test"),
|
||||
clientToClusterManagerlessNode.admin().indices().prepareAnalyze("no_index", "this is a test"),
|
||||
ClusterBlockException.class,
|
||||
RestStatus.SERVICE_UNAVAILABLE
|
||||
);
|
||||
|
||||
assertRequestBuilderThrows(
|
||||
clientToMasterlessNode.prepareSearch("test").setSize(0),
|
||||
clientToClusterManagerlessNode.prepareSearch("test").setSize(0),
|
||||
ClusterBlockException.class,
|
||||
RestStatus.SERVICE_UNAVAILABLE
|
||||
);
|
||||
|
||||
assertRequestBuilderThrows(
|
||||
clientToMasterlessNode.prepareSearch("no_index").setSize(0),
|
||||
clientToClusterManagerlessNode.prepareSearch("no_index").setSize(0),
|
||||
ClusterBlockException.class,
|
||||
RestStatus.SERVICE_UNAVAILABLE
|
||||
);
|
||||
|
@ -165,7 +171,7 @@ public class NoMasterNodeIT extends OpenSearchIntegTestCase {
|
|||
checkUpdateAction(
|
||||
false,
|
||||
timeout,
|
||||
clientToMasterlessNode.prepareUpdate("test", "1")
|
||||
clientToClusterManagerlessNode.prepareUpdate("test", "1")
|
||||
.setScript(new Script(ScriptType.INLINE, Script.DEFAULT_SCRIPT_LANG, "test script", Collections.emptyMap()))
|
||||
.setTimeout(timeout)
|
||||
);
|
||||
|
@ -173,41 +179,49 @@ public class NoMasterNodeIT extends OpenSearchIntegTestCase {
|
|||
checkUpdateAction(
|
||||
true,
|
||||
timeout,
|
||||
clientToMasterlessNode.prepareUpdate("no_index", "1")
|
||||
clientToClusterManagerlessNode.prepareUpdate("no_index", "1")
|
||||
.setScript(new Script(ScriptType.INLINE, Script.DEFAULT_SCRIPT_LANG, "test script", Collections.emptyMap()))
|
||||
.setTimeout(timeout)
|
||||
);
|
||||
|
||||
checkWriteAction(
|
||||
clientToMasterlessNode.prepareIndex("test")
|
||||
clientToClusterManagerlessNode.prepareIndex("test")
|
||||
.setId("1")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().endObject())
|
||||
.setTimeout(timeout)
|
||||
);
|
||||
|
||||
checkWriteAction(
|
||||
clientToMasterlessNode.prepareIndex("no_index")
|
||||
clientToClusterManagerlessNode.prepareIndex("no_index")
|
||||
.setId("1")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().endObject())
|
||||
.setTimeout(timeout)
|
||||
);
|
||||
|
||||
BulkRequestBuilder bulkRequestBuilder = clientToMasterlessNode.prepareBulk();
|
||||
BulkRequestBuilder bulkRequestBuilder = clientToClusterManagerlessNode.prepareBulk();
|
||||
bulkRequestBuilder.add(
|
||||
clientToMasterlessNode.prepareIndex("test").setId("1").setSource(XContentFactory.jsonBuilder().startObject().endObject())
|
||||
clientToClusterManagerlessNode.prepareIndex("test")
|
||||
.setId("1")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().endObject())
|
||||
);
|
||||
bulkRequestBuilder.add(
|
||||
clientToMasterlessNode.prepareIndex("test").setId("2").setSource(XContentFactory.jsonBuilder().startObject().endObject())
|
||||
clientToClusterManagerlessNode.prepareIndex("test")
|
||||
.setId("2")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().endObject())
|
||||
);
|
||||
bulkRequestBuilder.setTimeout(timeout);
|
||||
checkWriteAction(bulkRequestBuilder);
|
||||
|
||||
bulkRequestBuilder = clientToMasterlessNode.prepareBulk();
|
||||
bulkRequestBuilder = clientToClusterManagerlessNode.prepareBulk();
|
||||
bulkRequestBuilder.add(
|
||||
clientToMasterlessNode.prepareIndex("no_index").setId("1").setSource(XContentFactory.jsonBuilder().startObject().endObject())
|
||||
clientToClusterManagerlessNode.prepareIndex("no_index")
|
||||
.setId("1")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().endObject())
|
||||
);
|
||||
bulkRequestBuilder.add(
|
||||
clientToMasterlessNode.prepareIndex("no_index").setId("2").setSource(XContentFactory.jsonBuilder().startObject().endObject())
|
||||
clientToClusterManagerlessNode.prepareIndex("no_index")
|
||||
.setId("2")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().endObject())
|
||||
);
|
||||
bulkRequestBuilder.setTimeout(timeout);
|
||||
checkWriteAction(bulkRequestBuilder);
|
||||
|
@ -216,7 +230,7 @@ public class NoMasterNodeIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
|
||||
void checkUpdateAction(boolean autoCreateIndex, TimeValue timeout, ActionRequestBuilder<?, ?> builder) {
|
||||
// we clean the metadata when loosing a master, therefore all operations on indices will auto create it, if allowed
|
||||
// we clean the metadata when loosing a cluster-manager, therefore all operations on indices will auto create it, if allowed
|
||||
try {
|
||||
builder.get();
|
||||
fail("expected ClusterBlockException or MasterNotDiscoveredException");
|
||||
|
@ -239,7 +253,7 @@ public class NoMasterNodeIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testNoMasterActionsWriteMasterBlock() throws Exception {
|
||||
public void testNoClusterManagerActionsWriteClusterManagerBlock() throws Exception {
|
||||
Settings settings = Settings.builder()
|
||||
.put(AutoCreateIndex.AUTO_CREATE_INDEX_SETTING.getKey(), false)
|
||||
.put(NoMasterBlockService.NO_CLUSTER_MANAGER_BLOCK_SETTING.getKey(), "write")
|
||||
|
@ -270,31 +284,34 @@ public class NoMasterNodeIT extends OpenSearchIntegTestCase {
|
|||
internalCluster().setDisruptionScheme(disruptionScheme);
|
||||
disruptionScheme.startDisrupting();
|
||||
|
||||
final Client clientToMasterlessNode = client();
|
||||
final Client clientToClusterManagerlessNode = client();
|
||||
|
||||
assertBusy(() -> {
|
||||
ClusterState state = clientToMasterlessNode.admin().cluster().prepareState().setLocal(true).get().getState();
|
||||
ClusterState state = clientToClusterManagerlessNode.admin().cluster().prepareState().setLocal(true).get().getState();
|
||||
assertTrue(state.blocks().hasGlobalBlockWithId(NoMasterBlockService.NO_MASTER_BLOCK_ID));
|
||||
});
|
||||
|
||||
GetResponse getResponse = clientToMasterlessNode.prepareGet("test1", "1").get();
|
||||
GetResponse getResponse = clientToClusterManagerlessNode.prepareGet("test1", "1").get();
|
||||
assertExists(getResponse);
|
||||
|
||||
SearchResponse countResponse = clientToMasterlessNode.prepareSearch("test1").setAllowPartialSearchResults(true).setSize(0).get();
|
||||
SearchResponse countResponse = clientToClusterManagerlessNode.prepareSearch("test1")
|
||||
.setAllowPartialSearchResults(true)
|
||||
.setSize(0)
|
||||
.get();
|
||||
assertHitCount(countResponse, 1L);
|
||||
|
||||
logger.info("--> here 3");
|
||||
SearchResponse searchResponse = clientToMasterlessNode.prepareSearch("test1").setAllowPartialSearchResults(true).get();
|
||||
SearchResponse searchResponse = clientToClusterManagerlessNode.prepareSearch("test1").setAllowPartialSearchResults(true).get();
|
||||
assertHitCount(searchResponse, 1L);
|
||||
|
||||
countResponse = clientToMasterlessNode.prepareSearch("test2").setAllowPartialSearchResults(true).setSize(0).get();
|
||||
countResponse = clientToClusterManagerlessNode.prepareSearch("test2").setAllowPartialSearchResults(true).setSize(0).get();
|
||||
assertThat(countResponse.getTotalShards(), equalTo(3));
|
||||
assertThat(countResponse.getSuccessfulShards(), equalTo(1));
|
||||
|
||||
TimeValue timeout = TimeValue.timeValueMillis(200);
|
||||
long now = System.currentTimeMillis();
|
||||
try {
|
||||
clientToMasterlessNode.prepareUpdate("test1", "1")
|
||||
clientToClusterManagerlessNode.prepareUpdate("test1", "1")
|
||||
.setDoc(Requests.INDEX_CONTENT_TYPE, "field", "value2")
|
||||
.setTimeout(timeout)
|
||||
.get();
|
||||
|
@ -308,7 +325,7 @@ public class NoMasterNodeIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
|
||||
try {
|
||||
clientToMasterlessNode.prepareIndex("test1")
|
||||
clientToClusterManagerlessNode.prepareIndex("test1")
|
||||
.setId("1")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().endObject())
|
||||
.setTimeout(timeout)
|
||||
|
@ -321,7 +338,7 @@ public class NoMasterNodeIT extends OpenSearchIntegTestCase {
|
|||
internalCluster().clearDisruptionScheme(true);
|
||||
}
|
||||
|
||||
public void testNoMasterActionsMetadataWriteMasterBlock() throws Exception {
|
||||
public void testNoClusterManagerActionsMetadataWriteClusterManagerBlock() throws Exception {
|
||||
Settings settings = Settings.builder()
|
||||
.put(NoMasterBlockService.NO_CLUSTER_MANAGER_BLOCK_SETTING.getKey(), "metadata_write")
|
||||
.put(MappingUpdatedAction.INDICES_MAPPING_DYNAMIC_TIMEOUT_SETTING.getKey(), "100ms")
|
|
@ -286,7 +286,12 @@ public class SimpleClusterStateIT extends OpenSearchIntegTestCase {
|
|||
.get()
|
||||
);
|
||||
ensureGreen(); // wait for green state, so its both green, and there are no more pending events
|
||||
MappingMetadata masterMappingMetadata = client().admin().indices().prepareGetMappings("test").get().getMappings().get("test");
|
||||
MappingMetadata clusterManagerMappingMetadata = client().admin()
|
||||
.indices()
|
||||
.prepareGetMappings("test")
|
||||
.get()
|
||||
.getMappings()
|
||||
.get("test");
|
||||
for (Client client : clients()) {
|
||||
MappingMetadata mappingMetadata = client.admin()
|
||||
.indices()
|
||||
|
@ -295,8 +300,8 @@ public class SimpleClusterStateIT extends OpenSearchIntegTestCase {
|
|||
.get()
|
||||
.getMappings()
|
||||
.get("test");
|
||||
assertThat(mappingMetadata.source().string(), equalTo(masterMappingMetadata.source().string()));
|
||||
assertThat(mappingMetadata, equalTo(masterMappingMetadata));
|
||||
assertThat(mappingMetadata.source().string(), equalTo(clusterManagerMappingMetadata.source().string()));
|
||||
assertThat(mappingMetadata, equalTo(clusterManagerMappingMetadata));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -53,11 +53,11 @@ import static org.hamcrest.Matchers.equalTo;
|
|||
import static org.hamcrest.Matchers.nullValue;
|
||||
|
||||
@ClusterScope(scope = Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
|
||||
public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
||||
public class SpecificClusterManagerNodesIT extends OpenSearchIntegTestCase {
|
||||
|
||||
public void testSimpleOnlyMasterNodeElection() throws IOException {
|
||||
public void testSimpleOnlyClusterManagerNodeElection() throws IOException {
|
||||
internalCluster().setBootstrapClusterManagerNodeIndex(0);
|
||||
logger.info("--> start data node / non master node");
|
||||
logger.info("--> start data node / non cluster-manager node");
|
||||
internalCluster().startNode(Settings.builder().put(dataOnlyNode()).put("discovery.initial_state_timeout", "1s"));
|
||||
try {
|
||||
assertThat(
|
||||
|
@ -72,12 +72,12 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.getMasterNodeId(),
|
||||
nullValue()
|
||||
);
|
||||
fail("should not be able to find master");
|
||||
fail("should not be able to find cluster-manager");
|
||||
} catch (MasterNotDiscoveredException e) {
|
||||
// all is well, no cluster-manager elected
|
||||
}
|
||||
logger.info("--> start master node");
|
||||
final String masterNodeName = internalCluster().startClusterManagerOnlyNode();
|
||||
logger.info("--> start cluster-manager node");
|
||||
final String clusterManagerNodeName = internalCluster().startClusterManagerOnlyNode();
|
||||
assertThat(
|
||||
internalCluster().nonMasterClient()
|
||||
.admin()
|
||||
|
@ -89,7 +89,7 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.nodes()
|
||||
.getMasterNode()
|
||||
.getName(),
|
||||
equalTo(masterNodeName)
|
||||
equalTo(clusterManagerNodeName)
|
||||
);
|
||||
assertThat(
|
||||
internalCluster().masterClient()
|
||||
|
@ -102,11 +102,11 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.nodes()
|
||||
.getMasterNode()
|
||||
.getName(),
|
||||
equalTo(masterNodeName)
|
||||
equalTo(clusterManagerNodeName)
|
||||
);
|
||||
|
||||
logger.info("--> stop master node");
|
||||
Settings masterDataPathSettings = internalCluster().dataPathSettings(internalCluster().getMasterName());
|
||||
logger.info("--> stop cluster-manager node");
|
||||
Settings clusterManagerDataPathSettings = internalCluster().dataPathSettings(internalCluster().getMasterName());
|
||||
internalCluster().stopCurrentMasterNode();
|
||||
|
||||
try {
|
||||
|
@ -122,14 +122,14 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.getMasterNodeId(),
|
||||
nullValue()
|
||||
);
|
||||
fail("should not be able to find master");
|
||||
fail("should not be able to find cluster-manager");
|
||||
} catch (MasterNotDiscoveredException e) {
|
||||
// all is well, no cluster-manager elected
|
||||
}
|
||||
|
||||
logger.info("--> start previous master node again");
|
||||
final String nextMasterEligibleNodeName = internalCluster().startNode(
|
||||
Settings.builder().put(nonDataNode(masterNode())).put(masterDataPathSettings)
|
||||
logger.info("--> start previous cluster-manager node again");
|
||||
final String nextClusterManagerEligibleNodeName = internalCluster().startNode(
|
||||
Settings.builder().put(nonDataNode(masterNode())).put(clusterManagerDataPathSettings)
|
||||
);
|
||||
assertThat(
|
||||
internalCluster().nonMasterClient()
|
||||
|
@ -142,7 +142,7 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.nodes()
|
||||
.getMasterNode()
|
||||
.getName(),
|
||||
equalTo(nextMasterEligibleNodeName)
|
||||
equalTo(nextClusterManagerEligibleNodeName)
|
||||
);
|
||||
assertThat(
|
||||
internalCluster().masterClient()
|
||||
|
@ -155,13 +155,13 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.nodes()
|
||||
.getMasterNode()
|
||||
.getName(),
|
||||
equalTo(nextMasterEligibleNodeName)
|
||||
equalTo(nextClusterManagerEligibleNodeName)
|
||||
);
|
||||
}
|
||||
|
||||
public void testElectOnlyBetweenMasterNodes() throws Exception {
|
||||
public void testElectOnlyBetweenClusterManagerNodes() throws Exception {
|
||||
internalCluster().setBootstrapClusterManagerNodeIndex(0);
|
||||
logger.info("--> start data node / non master node");
|
||||
logger.info("--> start data node / non cluster-manager node");
|
||||
internalCluster().startNode(Settings.builder().put(dataOnlyNode()).put("discovery.initial_state_timeout", "1s"));
|
||||
try {
|
||||
assertThat(
|
||||
|
@ -176,12 +176,12 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.getMasterNodeId(),
|
||||
nullValue()
|
||||
);
|
||||
fail("should not be able to find master");
|
||||
fail("should not be able to find cluster-manager");
|
||||
} catch (MasterNotDiscoveredException e) {
|
||||
// all is well, no cluster-manager elected
|
||||
}
|
||||
logger.info("--> start master node (1)");
|
||||
final String masterNodeName = internalCluster().startClusterManagerOnlyNode();
|
||||
logger.info("--> start cluster-manager node (1)");
|
||||
final String clusterManagerNodeName = internalCluster().startClusterManagerOnlyNode();
|
||||
assertThat(
|
||||
internalCluster().nonMasterClient()
|
||||
.admin()
|
||||
|
@ -193,7 +193,7 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.nodes()
|
||||
.getMasterNode()
|
||||
.getName(),
|
||||
equalTo(masterNodeName)
|
||||
equalTo(clusterManagerNodeName)
|
||||
);
|
||||
assertThat(
|
||||
internalCluster().masterClient()
|
||||
|
@ -206,11 +206,11 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.nodes()
|
||||
.getMasterNode()
|
||||
.getName(),
|
||||
equalTo(masterNodeName)
|
||||
equalTo(clusterManagerNodeName)
|
||||
);
|
||||
|
||||
logger.info("--> start master node (2)");
|
||||
final String nextMasterEligableNodeName = internalCluster().startClusterManagerOnlyNode();
|
||||
logger.info("--> start cluster-manager node (2)");
|
||||
final String nextClusterManagerEligableNodeName = internalCluster().startClusterManagerOnlyNode();
|
||||
assertThat(
|
||||
internalCluster().nonMasterClient()
|
||||
.admin()
|
||||
|
@ -222,7 +222,7 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.nodes()
|
||||
.getMasterNode()
|
||||
.getName(),
|
||||
equalTo(masterNodeName)
|
||||
equalTo(clusterManagerNodeName)
|
||||
);
|
||||
assertThat(
|
||||
internalCluster().nonMasterClient()
|
||||
|
@ -235,7 +235,7 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.nodes()
|
||||
.getMasterNode()
|
||||
.getName(),
|
||||
equalTo(masterNodeName)
|
||||
equalTo(clusterManagerNodeName)
|
||||
);
|
||||
assertThat(
|
||||
internalCluster().masterClient()
|
||||
|
@ -248,12 +248,12 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.nodes()
|
||||
.getMasterNode()
|
||||
.getName(),
|
||||
equalTo(masterNodeName)
|
||||
equalTo(clusterManagerNodeName)
|
||||
);
|
||||
|
||||
logger.info("--> closing master node (1)");
|
||||
client().execute(AddVotingConfigExclusionsAction.INSTANCE, new AddVotingConfigExclusionsRequest(masterNodeName)).get();
|
||||
// removing the master from the voting configuration immediately triggers the master to step down
|
||||
logger.info("--> closing cluster-manager node (1)");
|
||||
client().execute(AddVotingConfigExclusionsAction.INSTANCE, new AddVotingConfigExclusionsRequest(clusterManagerNodeName)).get();
|
||||
// removing the cluster-manager from the voting configuration immediately triggers the cluster-manager to step down
|
||||
assertBusy(() -> {
|
||||
assertThat(
|
||||
internalCluster().nonMasterClient()
|
||||
|
@ -266,7 +266,7 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.nodes()
|
||||
.getMasterNode()
|
||||
.getName(),
|
||||
equalTo(nextMasterEligableNodeName)
|
||||
equalTo(nextClusterManagerEligableNodeName)
|
||||
);
|
||||
assertThat(
|
||||
internalCluster().masterClient()
|
||||
|
@ -279,10 +279,10 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.nodes()
|
||||
.getMasterNode()
|
||||
.getName(),
|
||||
equalTo(nextMasterEligableNodeName)
|
||||
equalTo(nextClusterManagerEligableNodeName)
|
||||
);
|
||||
});
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(masterNodeName));
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(clusterManagerNodeName));
|
||||
assertThat(
|
||||
internalCluster().nonMasterClient()
|
||||
.admin()
|
||||
|
@ -294,7 +294,7 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.nodes()
|
||||
.getMasterNode()
|
||||
.getName(),
|
||||
equalTo(nextMasterEligableNodeName)
|
||||
equalTo(nextClusterManagerEligableNodeName)
|
||||
);
|
||||
assertThat(
|
||||
internalCluster().masterClient()
|
||||
|
@ -307,16 +307,16 @@ public class SpecificMasterNodesIT extends OpenSearchIntegTestCase {
|
|||
.nodes()
|
||||
.getMasterNode()
|
||||
.getName(),
|
||||
equalTo(nextMasterEligableNodeName)
|
||||
equalTo(nextClusterManagerEligableNodeName)
|
||||
);
|
||||
}
|
||||
|
||||
public void testAliasFilterValidation() {
|
||||
internalCluster().setBootstrapClusterManagerNodeIndex(0);
|
||||
logger.info("--> start master node / non data");
|
||||
logger.info("--> start cluster-manager node / non data");
|
||||
internalCluster().startClusterManagerOnlyNode();
|
||||
|
||||
logger.info("--> start data node / non master node");
|
||||
logger.info("--> start data node / non cluster-manager node");
|
||||
internalCluster().startDataOnlyNode();
|
||||
|
||||
assertAcked(
|
|
@ -119,10 +119,13 @@ public class ShardStateActionIT extends OpenSearchIntegTestCase {
|
|||
.setPersistentSettings(Settings.builder().put(ShardStateAction.FOLLOW_UP_REROUTE_PRIORITY_SETTING.getKey(), "urgent"))
|
||||
);
|
||||
|
||||
// ensure that the master always has a HIGH priority pending task
|
||||
final AtomicBoolean stopSpammingMaster = new AtomicBoolean();
|
||||
final ClusterService masterClusterService = internalCluster().getInstance(ClusterService.class, internalCluster().getMasterName());
|
||||
masterClusterService.submitStateUpdateTask("spam", new ClusterStateUpdateTask(Priority.HIGH) {
|
||||
// ensure that the cluster-manager always has a HIGH priority pending task
|
||||
final AtomicBoolean stopSpammingClusterManager = new AtomicBoolean();
|
||||
final ClusterService clusterManagerClusterService = internalCluster().getInstance(
|
||||
ClusterService.class,
|
||||
internalCluster().getMasterName()
|
||||
);
|
||||
clusterManagerClusterService.submitStateUpdateTask("spam", new ClusterStateUpdateTask(Priority.HIGH) {
|
||||
@Override
|
||||
public ClusterState execute(ClusterState currentState) {
|
||||
return currentState;
|
||||
|
@ -135,18 +138,18 @@ public class ShardStateActionIT extends OpenSearchIntegTestCase {
|
|||
|
||||
@Override
|
||||
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
|
||||
if (stopSpammingMaster.get() == false) {
|
||||
masterClusterService.submitStateUpdateTask("spam", this);
|
||||
if (stopSpammingClusterManager.get() == false) {
|
||||
clusterManagerClusterService.submitStateUpdateTask("spam", this);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// even with the master under such pressure, all shards of the index can be assigned; in particular, after the primaries have
|
||||
// started there's a follow-up reroute at a higher priority than the spam
|
||||
// even with the cluster-manager under such pressure, all shards of the index can be assigned;
|
||||
// in particular, after the primaries have started there's a follow-up reroute at a higher priority than the spam
|
||||
createIndex("test");
|
||||
assertFalse(client().admin().cluster().prepareHealth().setWaitForGreenStatus().get().isTimedOut());
|
||||
|
||||
stopSpammingMaster.set(true);
|
||||
stopSpammingClusterManager.set(true);
|
||||
assertFalse(client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get().isTimedOut());
|
||||
|
||||
assertAcked(
|
||||
|
|
|
@ -105,9 +105,9 @@ public class RareClusterStateIT extends OpenSearchIntegTestCase {
|
|||
// close to have some unassigned started shards shards..
|
||||
client().admin().indices().prepareClose(index).get();
|
||||
|
||||
final String masterName = internalCluster().getMasterName();
|
||||
final ClusterService clusterService = internalCluster().clusterService(masterName);
|
||||
final AllocationService allocationService = internalCluster().getInstance(AllocationService.class, masterName);
|
||||
final String clusterManagerName = internalCluster().getMasterName();
|
||||
final ClusterService clusterService = internalCluster().clusterService(clusterManagerName);
|
||||
final AllocationService allocationService = internalCluster().getInstance(AllocationService.class, clusterManagerName);
|
||||
clusterService.submitStateUpdateTask("test-inject-node-and-reroute", new ClusterStateUpdateTask() {
|
||||
@Override
|
||||
public ClusterState execute(ClusterState currentState) {
|
||||
|
@ -159,16 +159,16 @@ public class RareClusterStateIT extends OpenSearchIntegTestCase {
|
|||
) throws Exception {
|
||||
// Wait for no publication in progress to not accidentally cancel a publication different from the one triggered by the given
|
||||
// request.
|
||||
final Coordinator masterCoordinator = (Coordinator) internalCluster().getCurrentMasterNodeInstance(Discovery.class);
|
||||
final Coordinator clusterManagerCoordinator = (Coordinator) internalCluster().getCurrentMasterNodeInstance(Discovery.class);
|
||||
assertBusy(() -> {
|
||||
assertFalse(masterCoordinator.publicationInProgress());
|
||||
final long applierVersion = masterCoordinator.getApplierState().version();
|
||||
assertFalse(clusterManagerCoordinator.publicationInProgress());
|
||||
final long applierVersion = clusterManagerCoordinator.getApplierState().version();
|
||||
for (Discovery instance : internalCluster().getInstances(Discovery.class)) {
|
||||
assertEquals(((Coordinator) instance).getApplierState().version(), applierVersion);
|
||||
}
|
||||
});
|
||||
ActionFuture<Res> future = req.execute();
|
||||
assertBusy(() -> assertTrue(masterCoordinator.cancelCommittedPublication()));
|
||||
assertBusy(() -> assertTrue(clusterManagerCoordinator.cancelCommittedPublication()));
|
||||
return future;
|
||||
}
|
||||
|
||||
|
@ -179,7 +179,7 @@ public class RareClusterStateIT extends OpenSearchIntegTestCase {
|
|||
prepareCreate("test").setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)).get();
|
||||
ensureGreen("test");
|
||||
|
||||
// block none master node.
|
||||
// block none cluster-manager node.
|
||||
BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(dataNode, random());
|
||||
internalCluster().setDisruptionScheme(disruption);
|
||||
logger.info("--> indexing a doc");
|
||||
|
@ -202,9 +202,9 @@ public class RareClusterStateIT extends OpenSearchIntegTestCase {
|
|||
ensureGreen(TimeValue.timeValueMinutes(30), "test");
|
||||
// due to publish_timeout of 0, wait for data node to have cluster state fully applied
|
||||
assertBusy(() -> {
|
||||
long masterClusterStateVersion = internalCluster().clusterService(internalCluster().getMasterName()).state().version();
|
||||
long clusterManagerClusterStateVersion = internalCluster().clusterService(internalCluster().getMasterName()).state().version();
|
||||
long dataClusterStateVersion = internalCluster().clusterService(dataNode).state().version();
|
||||
assertThat(masterClusterStateVersion, equalTo(dataClusterStateVersion));
|
||||
assertThat(clusterManagerClusterStateVersion, equalTo(dataClusterStateVersion));
|
||||
});
|
||||
assertHitCount(client().prepareSearch("test").get(), 0);
|
||||
}
|
||||
|
@ -212,7 +212,7 @@ public class RareClusterStateIT extends OpenSearchIntegTestCase {
|
|||
public void testDelayedMappingPropagationOnPrimary() throws Exception {
|
||||
// Here we want to test that things go well if there is a first request
|
||||
// that adds mappings but before mappings are propagated to all nodes
|
||||
// another index request introduces the same mapping. The master node
|
||||
// another index request introduces the same mapping. The cluster-manager node
|
||||
// will reply immediately since it did not change the cluster state
|
||||
// but the change might not be on the node that performed the indexing
|
||||
// operation yet
|
||||
|
@ -220,36 +220,36 @@ public class RareClusterStateIT extends OpenSearchIntegTestCase {
|
|||
final List<String> nodeNames = internalCluster().startNodes(2);
|
||||
assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
|
||||
|
||||
final String master = internalCluster().getMasterName();
|
||||
assertThat(nodeNames, hasItem(master));
|
||||
final String clusterManager = internalCluster().getMasterName();
|
||||
assertThat(nodeNames, hasItem(clusterManager));
|
||||
String otherNode = null;
|
||||
for (String node : nodeNames) {
|
||||
if (node.equals(master) == false) {
|
||||
if (node.equals(clusterManager) == false) {
|
||||
otherNode = node;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assertNotNull(otherNode);
|
||||
|
||||
// Don't allocate the shard on the master node
|
||||
// Don't allocate the shard on the cluster-manager node
|
||||
assertAcked(
|
||||
prepareCreate("index").setSettings(
|
||||
Settings.builder()
|
||||
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
|
||||
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
|
||||
.put("index.routing.allocation.exclude._name", master)
|
||||
.put("index.routing.allocation.exclude._name", clusterManager)
|
||||
).get()
|
||||
);
|
||||
ensureGreen();
|
||||
|
||||
// Check routing tables
|
||||
ClusterState state = client().admin().cluster().prepareState().get().getState();
|
||||
assertEquals(master, state.nodes().getMasterNode().getName());
|
||||
assertEquals(clusterManager, state.nodes().getMasterNode().getName());
|
||||
List<ShardRouting> shards = state.routingTable().allShards("index");
|
||||
assertThat(shards, hasSize(1));
|
||||
for (ShardRouting shard : shards) {
|
||||
if (shard.primary()) {
|
||||
// primary must not be on the master node
|
||||
// primary must not be on the cluster-manager node
|
||||
assertFalse(state.nodes().getMasterNodeId().equals(shard.currentNodeId()));
|
||||
} else {
|
||||
fail(); // only primaries
|
||||
|
@ -266,7 +266,7 @@ public class RareClusterStateIT extends OpenSearchIntegTestCase {
|
|||
client().admin().indices().preparePutMapping("index").setSource("field", "type=long")
|
||||
);
|
||||
|
||||
// ...and wait for mappings to be available on master
|
||||
// ...and wait for mappings to be available on cluster-manager
|
||||
assertBusy(() -> {
|
||||
MappingMetadata typeMappings = client().admin().indices().prepareGetMappings("index").get().getMappings().get("index");
|
||||
assertNotNull(typeMappings);
|
||||
|
@ -308,24 +308,24 @@ public class RareClusterStateIT extends OpenSearchIntegTestCase {
|
|||
final List<String> nodeNames = internalCluster().startNodes(2);
|
||||
assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
|
||||
|
||||
final String master = internalCluster().getMasterName();
|
||||
assertThat(nodeNames, hasItem(master));
|
||||
final String clusterManager = internalCluster().getMasterName();
|
||||
assertThat(nodeNames, hasItem(clusterManager));
|
||||
String otherNode = null;
|
||||
for (String node : nodeNames) {
|
||||
if (node.equals(master) == false) {
|
||||
if (node.equals(clusterManager) == false) {
|
||||
otherNode = node;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assertNotNull(otherNode);
|
||||
|
||||
// Force allocation of the primary on the master node by first only allocating on the master
|
||||
// Force allocation of the primary on the cluster-manager node by first only allocating on the cluster-manager
|
||||
// and then allowing all nodes so that the replica gets allocated on the other node
|
||||
prepareCreate("index").setSettings(
|
||||
Settings.builder()
|
||||
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
|
||||
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
|
||||
.put("index.routing.allocation.include._name", master)
|
||||
.put("index.routing.allocation.include._name", clusterManager)
|
||||
).get();
|
||||
client().admin()
|
||||
.indices()
|
||||
|
@ -336,12 +336,12 @@ public class RareClusterStateIT extends OpenSearchIntegTestCase {
|
|||
|
||||
// Check routing tables
|
||||
ClusterState state = client().admin().cluster().prepareState().get().getState();
|
||||
assertEquals(master, state.nodes().getMasterNode().getName());
|
||||
assertEquals(clusterManager, state.nodes().getMasterNode().getName());
|
||||
List<ShardRouting> shards = state.routingTable().allShards("index");
|
||||
assertThat(shards, hasSize(2));
|
||||
for (ShardRouting shard : shards) {
|
||||
if (shard.primary()) {
|
||||
// primary must be on the master
|
||||
// primary must be on the cluster-manager
|
||||
assertEquals(state.nodes().getMasterNodeId(), shard.currentNodeId());
|
||||
} else {
|
||||
assertTrue(shard.active());
|
||||
|
@ -357,9 +357,9 @@ public class RareClusterStateIT extends OpenSearchIntegTestCase {
|
|||
);
|
||||
|
||||
final Index index = resolveIndex("index");
|
||||
// Wait for mappings to be available on master
|
||||
// Wait for mappings to be available on cluster-manager
|
||||
assertBusy(() -> {
|
||||
final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, master);
|
||||
final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, clusterManager);
|
||||
final IndexService indexService = indicesService.indexServiceSafe(index);
|
||||
assertNotNull(indexService);
|
||||
final MapperService mapperService = indexService.mapperService();
|
||||
|
@ -381,9 +381,9 @@ public class RareClusterStateIT extends OpenSearchIntegTestCase {
|
|||
client().prepareIndex("index").setId("2").setSource("field2", 42)
|
||||
);
|
||||
|
||||
// ...and wait for second mapping to be available on master
|
||||
// ...and wait for second mapping to be available on cluster-manager
|
||||
assertBusy(() -> {
|
||||
final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, master);
|
||||
final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, clusterManager);
|
||||
final IndexService indexService = indicesService.indexServiceSafe(index);
|
||||
assertNotNull(indexService);
|
||||
final MapperService mapperService = indexService.mapperService();
|
||||
|
|
|
@ -167,7 +167,7 @@ public class UnsafeBootstrapAndDetachCommandIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testBootstrapNotMasterEligible() {
|
||||
public void testBootstrapNotClusterManagerEligible() {
|
||||
final Environment environment = TestEnvironment.newEnvironment(
|
||||
Settings.builder().put(nonMasterNode(internalCluster().getDefaultSettings())).build()
|
||||
);
|
||||
|
@ -283,12 +283,12 @@ public class UnsafeBootstrapAndDetachCommandIT extends OpenSearchIntegTestCase {
|
|||
expectThrows(() -> detachCluster(environment, true), OpenSearchNodeCommand.ABORTED_BY_USER_MSG);
|
||||
}
|
||||
|
||||
public void test3MasterNodes2Failed() throws Exception {
|
||||
public void test3ClusterManagerNodes2Failed() throws Exception {
|
||||
internalCluster().setBootstrapClusterManagerNodeIndex(2);
|
||||
List<String> masterNodes = new ArrayList<>();
|
||||
List<String> clusterManagerNodes = new ArrayList<>();
|
||||
|
||||
logger.info("--> start 1st cluster-manager-eligible node");
|
||||
masterNodes.add(
|
||||
clusterManagerNodes.add(
|
||||
internalCluster().startClusterManagerOnlyNode(
|
||||
Settings.builder().put(DiscoverySettings.INITIAL_STATE_TIMEOUT_SETTING.getKey(), "0s").build()
|
||||
)
|
||||
|
@ -300,12 +300,12 @@ public class UnsafeBootstrapAndDetachCommandIT extends OpenSearchIntegTestCase {
|
|||
); // node ordinal 1
|
||||
|
||||
logger.info("--> start 2nd and 3rd cluster-manager-eligible nodes and bootstrap");
|
||||
masterNodes.addAll(internalCluster().startMasterOnlyNodes(2)); // node ordinals 2 and 3
|
||||
clusterManagerNodes.addAll(internalCluster().startMasterOnlyNodes(2)); // node ordinals 2 and 3
|
||||
|
||||
logger.info("--> wait for all nodes to join the cluster");
|
||||
ensureStableCluster(4);
|
||||
|
||||
List<String> currentClusterNodes = new ArrayList<>(masterNodes);
|
||||
List<String> currentClusterNodes = new ArrayList<>(clusterManagerNodes);
|
||||
currentClusterNodes.add(dataNode);
|
||||
currentClusterNodes.forEach(node -> ensureReadOnlyBlock(false, node));
|
||||
|
||||
|
@ -313,14 +313,14 @@ public class UnsafeBootstrapAndDetachCommandIT extends OpenSearchIntegTestCase {
|
|||
createIndex("test");
|
||||
ensureGreen("test");
|
||||
|
||||
Settings master1DataPathSettings = internalCluster().dataPathSettings(masterNodes.get(0));
|
||||
Settings master2DataPathSettings = internalCluster().dataPathSettings(masterNodes.get(1));
|
||||
Settings master3DataPathSettings = internalCluster().dataPathSettings(masterNodes.get(2));
|
||||
Settings clusterManager1DataPathSettings = internalCluster().dataPathSettings(clusterManagerNodes.get(0));
|
||||
Settings clusterManager2DataPathSettings = internalCluster().dataPathSettings(clusterManagerNodes.get(1));
|
||||
Settings clusterManager3DataPathSettings = internalCluster().dataPathSettings(clusterManagerNodes.get(2));
|
||||
Settings dataNodeDataPathSettings = internalCluster().dataPathSettings(dataNode);
|
||||
|
||||
logger.info("--> stop 2nd and 3d master eligible node");
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(masterNodes.get(1)));
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(masterNodes.get(2)));
|
||||
logger.info("--> stop 2nd and 3d cluster-manager eligible node");
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(clusterManagerNodes.get(1)));
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(clusterManagerNodes.get(2)));
|
||||
|
||||
logger.info("--> ensure NO_MASTER_BLOCK on data-only node");
|
||||
assertBusy(() -> {
|
||||
|
@ -336,19 +336,19 @@ public class UnsafeBootstrapAndDetachCommandIT extends OpenSearchIntegTestCase {
|
|||
});
|
||||
|
||||
logger.info("--> try to unsafely bootstrap 1st cluster-manager-eligible node, while node lock is held");
|
||||
Environment environmentMaster1 = TestEnvironment.newEnvironment(
|
||||
Settings.builder().put(internalCluster().getDefaultSettings()).put(master1DataPathSettings).build()
|
||||
Environment environmentClusterManager1 = TestEnvironment.newEnvironment(
|
||||
Settings.builder().put(internalCluster().getDefaultSettings()).put(clusterManager1DataPathSettings).build()
|
||||
);
|
||||
expectThrows(() -> unsafeBootstrap(environmentMaster1), UnsafeBootstrapMasterCommand.FAILED_TO_OBTAIN_NODE_LOCK_MSG);
|
||||
expectThrows(() -> unsafeBootstrap(environmentClusterManager1), UnsafeBootstrapMasterCommand.FAILED_TO_OBTAIN_NODE_LOCK_MSG);
|
||||
|
||||
logger.info("--> stop 1st cluster-manager-eligible node and data-only node");
|
||||
NodeEnvironment nodeEnvironment = internalCluster().getMasterNodeInstance(NodeEnvironment.class);
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(masterNodes.get(0)));
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(clusterManagerNodes.get(0)));
|
||||
assertBusy(() -> internalCluster().getInstance(GatewayMetaState.class, dataNode).allPendingAsyncStatesWritten());
|
||||
internalCluster().stopRandomDataNode();
|
||||
|
||||
logger.info("--> unsafely-bootstrap 1st cluster-manager-eligible node");
|
||||
MockTerminal terminal = unsafeBootstrap(environmentMaster1, false, true);
|
||||
MockTerminal terminal = unsafeBootstrap(environmentClusterManager1, false, true);
|
||||
Metadata metadata = OpenSearchNodeCommand.createPersistedClusterStateService(Settings.EMPTY, nodeEnvironment.nodeDataPaths())
|
||||
.loadBestOnDiskState().metadata;
|
||||
assertThat(
|
||||
|
@ -364,7 +364,7 @@ public class UnsafeBootstrapAndDetachCommandIT extends OpenSearchIntegTestCase {
|
|||
);
|
||||
|
||||
logger.info("--> start 1st cluster-manager-eligible node");
|
||||
String masterNode2 = internalCluster().startClusterManagerOnlyNode(master1DataPathSettings);
|
||||
String clusterManagerNode2 = internalCluster().startClusterManagerOnlyNode(clusterManager1DataPathSettings);
|
||||
|
||||
logger.info("--> detach-cluster on data-only node");
|
||||
Environment environmentData = TestEnvironment.newEnvironment(
|
||||
|
@ -391,7 +391,7 @@ public class UnsafeBootstrapAndDetachCommandIT extends OpenSearchIntegTestCase {
|
|||
|
||||
List<String> bootstrappedNodes = new ArrayList<>();
|
||||
bootstrappedNodes.add(dataNode2);
|
||||
bootstrappedNodes.add(masterNode2);
|
||||
bootstrappedNodes.add(clusterManagerNode2);
|
||||
bootstrappedNodes.forEach(node -> ensureReadOnlyBlock(true, node));
|
||||
|
||||
logger.info("--> ensure index test is green");
|
||||
|
@ -400,30 +400,30 @@ public class UnsafeBootstrapAndDetachCommandIT extends OpenSearchIntegTestCase {
|
|||
assertThat(indexMetadata.getSettings().get(IndexMetadata.SETTING_HISTORY_UUID), notNullValue());
|
||||
|
||||
logger.info("--> detach-cluster on 2nd and 3rd cluster-manager-eligible nodes");
|
||||
Environment environmentMaster2 = TestEnvironment.newEnvironment(
|
||||
Settings.builder().put(internalCluster().getDefaultSettings()).put(master2DataPathSettings).build()
|
||||
Environment environmentClusterManager2 = TestEnvironment.newEnvironment(
|
||||
Settings.builder().put(internalCluster().getDefaultSettings()).put(clusterManager2DataPathSettings).build()
|
||||
);
|
||||
detachCluster(environmentMaster2, false);
|
||||
Environment environmentMaster3 = TestEnvironment.newEnvironment(
|
||||
Settings.builder().put(internalCluster().getDefaultSettings()).put(master3DataPathSettings).build()
|
||||
detachCluster(environmentClusterManager2, false);
|
||||
Environment environmentClusterManager3 = TestEnvironment.newEnvironment(
|
||||
Settings.builder().put(internalCluster().getDefaultSettings()).put(clusterManager3DataPathSettings).build()
|
||||
);
|
||||
detachCluster(environmentMaster3, false);
|
||||
detachCluster(environmentClusterManager3, false);
|
||||
|
||||
logger.info("--> start 2nd and 3rd cluster-manager-eligible nodes and ensure 4 nodes stable cluster");
|
||||
bootstrappedNodes.add(internalCluster().startClusterManagerOnlyNode(master2DataPathSettings));
|
||||
bootstrappedNodes.add(internalCluster().startClusterManagerOnlyNode(master3DataPathSettings));
|
||||
bootstrappedNodes.add(internalCluster().startClusterManagerOnlyNode(clusterManager2DataPathSettings));
|
||||
bootstrappedNodes.add(internalCluster().startClusterManagerOnlyNode(clusterManager3DataPathSettings));
|
||||
ensureStableCluster(4);
|
||||
bootstrappedNodes.forEach(node -> ensureReadOnlyBlock(true, node));
|
||||
removeBlock();
|
||||
}
|
||||
|
||||
public void testAllMasterEligibleNodesFailedDanglingIndexImport() throws Exception {
|
||||
public void testAllClusterManagerEligibleNodesFailedDanglingIndexImport() throws Exception {
|
||||
internalCluster().setBootstrapClusterManagerNodeIndex(0);
|
||||
|
||||
Settings settings = Settings.builder().put(AUTO_IMPORT_DANGLING_INDICES_SETTING.getKey(), true).build();
|
||||
|
||||
logger.info("--> start mixed data and cluster-manager-eligible node and bootstrap cluster");
|
||||
String masterNode = internalCluster().startNode(settings); // node ordinal 0
|
||||
String clusterManagerNode = internalCluster().startNode(settings); // node ordinal 0
|
||||
|
||||
logger.info("--> start data-only node and ensure 2 nodes stable cluster");
|
||||
String dataNode = internalCluster().startDataOnlyNode(settings); // node ordinal 1
|
||||
|
@ -458,7 +458,7 @@ public class UnsafeBootstrapAndDetachCommandIT extends OpenSearchIntegTestCase {
|
|||
detachCluster(environment, false);
|
||||
|
||||
logger.info("--> stop cluster-manager-eligible node, clear its data and start it again - new cluster should form");
|
||||
internalCluster().restartNode(masterNode, new InternalTestCluster.RestartCallback() {
|
||||
internalCluster().restartNode(clusterManagerNode, new InternalTestCluster.RestartCallback() {
|
||||
@Override
|
||||
public boolean clearData(String nodeName) {
|
||||
return true;
|
||||
|
@ -490,7 +490,7 @@ public class UnsafeBootstrapAndDetachCommandIT extends OpenSearchIntegTestCase {
|
|||
|
||||
String node = internalCluster().startClusterManagerOnlyNode(
|
||||
Settings.builder()
|
||||
// give the cluster 2 seconds to elect the master (it should not)
|
||||
// give the cluster 2 seconds to elect the cluster-manager (it should not)
|
||||
.put(DiscoverySettings.INITIAL_STATE_TIMEOUT_SETTING.getKey(), "2s")
|
||||
.put(clusterManagerNodeDataPathSettings)
|
||||
.build()
|
||||
|
@ -524,9 +524,9 @@ public class UnsafeBootstrapAndDetachCommandIT extends OpenSearchIntegTestCase {
|
|||
detachCluster(environment);
|
||||
unsafeBootstrap(environment); // read-only block will remain same as one before bootstrap, in this case it is false
|
||||
|
||||
String masterNode2 = internalCluster().startClusterManagerOnlyNode(clusterManagerNodeDataPathSettings);
|
||||
String clusterManagerNode2 = internalCluster().startClusterManagerOnlyNode(clusterManagerNodeDataPathSettings);
|
||||
ensureGreen();
|
||||
ensureReadOnlyBlock(false, masterNode2);
|
||||
ensureReadOnlyBlock(false, clusterManagerNode2);
|
||||
|
||||
state = internalCluster().client().admin().cluster().prepareState().execute().actionGet().getState();
|
||||
assertThat(state.metadata().settings().get(INDICES_RECOVERY_MAX_BYTES_PER_SEC_SETTING.getKey()), equalTo("1234kb"));
|
||||
|
|
|
@ -64,12 +64,12 @@ public class VotingConfigurationIT extends OpenSearchIntegTestCase {
|
|||
public void testAbdicateAfterVotingConfigExclusionAdded() throws ExecutionException, InterruptedException {
|
||||
internalCluster().setBootstrapClusterManagerNodeIndex(0);
|
||||
internalCluster().startNodes(2);
|
||||
final String originalMaster = internalCluster().getMasterName();
|
||||
final String originalClusterManager = internalCluster().getMasterName();
|
||||
|
||||
logger.info("--> excluding master node {}", originalMaster);
|
||||
client().execute(AddVotingConfigExclusionsAction.INSTANCE, new AddVotingConfigExclusionsRequest(originalMaster)).get();
|
||||
logger.info("--> excluding cluster-manager node {}", originalClusterManager);
|
||||
client().execute(AddVotingConfigExclusionsAction.INSTANCE, new AddVotingConfigExclusionsRequest(originalClusterManager)).get();
|
||||
client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get();
|
||||
assertNotEquals(originalMaster, internalCluster().getMasterName());
|
||||
assertNotEquals(originalClusterManager, internalCluster().getMasterName());
|
||||
}
|
||||
|
||||
public void testElectsNodeNotInVotingConfiguration() throws Exception {
|
||||
|
@ -77,7 +77,8 @@ public class VotingConfigurationIT extends OpenSearchIntegTestCase {
|
|||
final List<String> nodeNames = internalCluster().startNodes(4);
|
||||
|
||||
// a 4-node cluster settles on a 3-node configuration; we then prevent the nodes in the configuration from winning an election
|
||||
// by failing at the pre-voting stage, so that the extra node must be elected instead when the master shuts down. This extra node
|
||||
// by failing at the pre-voting stage, so that the extra node must be elected instead when the cluster-manager shuts down. This
|
||||
// extra node
|
||||
// should then add itself into the voting configuration.
|
||||
|
||||
assertFalse(
|
||||
|
|
|
@ -71,10 +71,10 @@ import static org.hamcrest.Matchers.notNullValue;
|
|||
@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0, numClientNodes = 0)
|
||||
public class ZenDiscoveryIT extends OpenSearchIntegTestCase {
|
||||
|
||||
public void testNoShardRelocationsOccurWhenElectedMasterNodeFails() throws Exception {
|
||||
public void testNoShardRelocationsOccurWhenElectedClusterManagerNodeFails() throws Exception {
|
||||
|
||||
Settings masterNodeSettings = clusterManagerOnlyNode();
|
||||
internalCluster().startNodes(2, masterNodeSettings);
|
||||
Settings clusterManagerNodeSettings = clusterManagerOnlyNode();
|
||||
internalCluster().startNodes(2, clusterManagerNodeSettings);
|
||||
Settings dateNodeSettings = dataNode();
|
||||
internalCluster().startNodes(2, dateNodeSettings);
|
||||
ClusterHealthResponse clusterHealthResponse = client().admin()
|
||||
|
@ -89,20 +89,20 @@ public class ZenDiscoveryIT extends OpenSearchIntegTestCase {
|
|||
createIndex("test");
|
||||
ensureSearchable("test");
|
||||
RecoveryResponse r = client().admin().indices().prepareRecoveries("test").get();
|
||||
int numRecoveriesBeforeNewMaster = r.shardRecoveryStates().get("test").size();
|
||||
int numRecoveriesBeforeNewClusterManager = r.shardRecoveryStates().get("test").size();
|
||||
|
||||
final String oldMaster = internalCluster().getMasterName();
|
||||
final String oldClusterManager = internalCluster().getMasterName();
|
||||
internalCluster().stopCurrentMasterNode();
|
||||
assertBusy(() -> {
|
||||
String current = internalCluster().getMasterName();
|
||||
assertThat(current, notNullValue());
|
||||
assertThat(current, not(equalTo(oldMaster)));
|
||||
assertThat(current, not(equalTo(oldClusterManager)));
|
||||
});
|
||||
ensureSearchable("test");
|
||||
|
||||
r = client().admin().indices().prepareRecoveries("test").get();
|
||||
int numRecoveriesAfterNewMaster = r.shardRecoveryStates().get("test").size();
|
||||
assertThat(numRecoveriesAfterNewMaster, equalTo(numRecoveriesBeforeNewMaster));
|
||||
int numRecoveriesAfterNewClusterManager = r.shardRecoveryStates().get("test").size();
|
||||
assertThat(numRecoveriesAfterNewClusterManager, equalTo(numRecoveriesBeforeNewClusterManager));
|
||||
}
|
||||
|
||||
public void testHandleNodeJoin_incompatibleClusterState() throws InterruptedException, ExecutionException, TimeoutException {
|
||||
|
|
|
@ -135,7 +135,7 @@ public class AllocationIdIT extends OpenSearchIntegTestCase {
|
|||
// create fake corrupted marker on node1
|
||||
putFakeCorruptionMarker(indexSettings, shardId, indexPath);
|
||||
|
||||
// thanks to master node1 is out of sync
|
||||
// thanks to cluster-manager node1 is out of sync
|
||||
node1 = internalCluster().startNode(node1DataPathSettings);
|
||||
|
||||
// there is only _stale_ primary
|
||||
|
|
|
@ -110,7 +110,7 @@ public class PrimaryAllocationIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
|
||||
public void testBulkWeirdScenario() throws Exception {
|
||||
String master = internalCluster().startClusterManagerOnlyNode(Settings.EMPTY);
|
||||
String clusterManager = internalCluster().startClusterManagerOnlyNode(Settings.EMPTY);
|
||||
internalCluster().startDataOnlyNodes(2);
|
||||
|
||||
assertAcked(
|
||||
|
@ -149,7 +149,7 @@ public class PrimaryAllocationIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
|
||||
// returns data paths settings of in-sync shard copy
|
||||
private Settings createStaleReplicaScenario(String master) throws Exception {
|
||||
private Settings createStaleReplicaScenario(String clusterManager) throws Exception {
|
||||
client().prepareIndex("test").setSource(jsonBuilder().startObject().field("field", "value1").endObject()).get();
|
||||
refresh();
|
||||
ClusterState state = client().admin().cluster().prepareState().all().get().getState();
|
||||
|
@ -167,14 +167,14 @@ public class PrimaryAllocationIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
|
||||
NetworkDisruption partition = new NetworkDisruption(
|
||||
new TwoPartitions(Sets.newHashSet(master, replicaNode), Collections.singleton(primaryNode)),
|
||||
new TwoPartitions(Sets.newHashSet(clusterManager, replicaNode), Collections.singleton(primaryNode)),
|
||||
NetworkDisruption.DISCONNECT
|
||||
);
|
||||
internalCluster().setDisruptionScheme(partition);
|
||||
logger.info("--> partitioning node with primary shard from rest of cluster");
|
||||
partition.startDisrupting();
|
||||
|
||||
ensureStableCluster(2, master);
|
||||
ensureStableCluster(2, clusterManager);
|
||||
|
||||
logger.info("--> index a document into previous replica shard (that is now primary)");
|
||||
client(replicaNode).prepareIndex("test").setSource(jsonBuilder().startObject().field("field", "value1").endObject()).get();
|
||||
|
@ -183,27 +183,30 @@ public class PrimaryAllocationIT extends OpenSearchIntegTestCase {
|
|||
final Settings inSyncDataPathSettings = internalCluster().dataPathSettings(replicaNode);
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(replicaNode));
|
||||
|
||||
ensureStableCluster(1, master);
|
||||
ensureStableCluster(1, clusterManager);
|
||||
|
||||
partition.stopDisrupting();
|
||||
|
||||
logger.info("--> waiting for node with old primary shard to rejoin the cluster");
|
||||
ensureStableCluster(2, master);
|
||||
ensureStableCluster(2, clusterManager);
|
||||
|
||||
logger.info("--> check that old primary shard does not get promoted to primary again");
|
||||
// kick reroute and wait for all shard states to be fetched
|
||||
client(master).admin().cluster().prepareReroute().get();
|
||||
client(clusterManager).admin().cluster().prepareReroute().get();
|
||||
assertBusy(
|
||||
() -> assertThat(internalCluster().getInstance(GatewayAllocator.class, master).getNumberOfInFlightFetches(), equalTo(0))
|
||||
() -> assertThat(internalCluster().getInstance(GatewayAllocator.class, clusterManager).getNumberOfInFlightFetches(), equalTo(0))
|
||||
);
|
||||
// kick reroute a second time and check that all shards are unassigned
|
||||
assertThat(client(master).admin().cluster().prepareReroute().get().getState().getRoutingNodes().unassigned().size(), equalTo(2));
|
||||
assertThat(
|
||||
client(clusterManager).admin().cluster().prepareReroute().get().getState().getRoutingNodes().unassigned().size(),
|
||||
equalTo(2)
|
||||
);
|
||||
return inSyncDataPathSettings;
|
||||
}
|
||||
|
||||
public void testDoNotAllowStaleReplicasToBePromotedToPrimary() throws Exception {
|
||||
logger.info("--> starting 3 nodes, 1 master, 2 data");
|
||||
String master = internalCluster().startClusterManagerOnlyNode(Settings.EMPTY);
|
||||
logger.info("--> starting 3 nodes, 1 cluster-manager, 2 data");
|
||||
String clusterManager = internalCluster().startClusterManagerOnlyNode(Settings.EMPTY);
|
||||
internalCluster().startDataOnlyNodes(2);
|
||||
assertAcked(
|
||||
client().admin()
|
||||
|
@ -213,7 +216,7 @@ public class PrimaryAllocationIT extends OpenSearchIntegTestCase {
|
|||
.get()
|
||||
);
|
||||
ensureGreen();
|
||||
final Settings inSyncDataPathSettings = createStaleReplicaScenario(master);
|
||||
final Settings inSyncDataPathSettings = createStaleReplicaScenario(clusterManager);
|
||||
|
||||
logger.info("--> starting node that reuses data folder with the up-to-date primary shard");
|
||||
internalCluster().startDataOnlyNode(inSyncDataPathSettings);
|
||||
|
@ -291,7 +294,7 @@ public class PrimaryAllocationIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
|
||||
public void testForceStaleReplicaToBePromotedToPrimary() throws Exception {
|
||||
logger.info("--> starting 3 nodes, 1 master, 2 data");
|
||||
logger.info("--> starting 3 nodes, 1 cluster-manager, 2 data");
|
||||
String clusterManager = internalCluster().startClusterManagerOnlyNode(Settings.EMPTY);
|
||||
internalCluster().startDataOnlyNodes(2);
|
||||
assertAcked(
|
||||
|
@ -657,7 +660,7 @@ public class PrimaryAllocationIT extends OpenSearchIntegTestCase {
|
|||
* This test asserts that replicas failed to execute resync operations will be failed but not marked as stale.
|
||||
*/
|
||||
public void testPrimaryReplicaResyncFailed() throws Exception {
|
||||
String master = internalCluster().startClusterManagerOnlyNode(Settings.EMPTY);
|
||||
String clusterManager = internalCluster().startClusterManagerOnlyNode(Settings.EMPTY);
|
||||
final int numberOfReplicas = between(2, 3);
|
||||
final String oldPrimary = internalCluster().startDataOnlyNode();
|
||||
assertAcked(
|
||||
|
@ -671,7 +674,7 @@ public class PrimaryAllocationIT extends OpenSearchIntegTestCase {
|
|||
ensureGreen();
|
||||
String timeout = randomFrom("0s", "1s", "2s");
|
||||
assertAcked(
|
||||
client(master).admin()
|
||||
client(clusterManager).admin()
|
||||
.cluster()
|
||||
.prepareUpdateSettings()
|
||||
.setTransientSettings(Settings.builder().put("cluster.routing.allocation.enable", "none"))
|
||||
|
@ -700,7 +703,7 @@ public class PrimaryAllocationIT extends OpenSearchIntegTestCase {
|
|||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(oldPrimary));
|
||||
// Checks that we fails replicas in one side but not mark them as stale.
|
||||
assertBusy(() -> {
|
||||
ClusterState state = client(master).admin().cluster().prepareState().get().getState();
|
||||
ClusterState state = client(clusterManager).admin().cluster().prepareState().get().getState();
|
||||
final IndexShardRoutingTable shardRoutingTable = state.routingTable().shardRoutingTable(shardId);
|
||||
final String newPrimaryNode = state.getRoutingNodes().node(shardRoutingTable.primary.currentNodeId()).node().getName();
|
||||
assertThat(newPrimaryNode, not(equalTo(oldPrimary)));
|
||||
|
@ -712,7 +715,7 @@ public class PrimaryAllocationIT extends OpenSearchIntegTestCase {
|
|||
assertThat(state.metadata().index("test").inSyncAllocationIds(shardId.id()), hasSize(numberOfReplicas + 1));
|
||||
}, 1, TimeUnit.MINUTES);
|
||||
assertAcked(
|
||||
client(master).admin()
|
||||
client(clusterManager).admin()
|
||||
.cluster()
|
||||
.prepareUpdateSettings()
|
||||
.setTransientSettings(Settings.builder().put("cluster.routing.allocation.enable", "all"))
|
||||
|
@ -722,7 +725,7 @@ public class PrimaryAllocationIT extends OpenSearchIntegTestCase {
|
|||
partition.ensureHealthy(internalCluster());
|
||||
logger.info("--> stop disrupting network and re-enable allocation");
|
||||
assertBusy(() -> {
|
||||
ClusterState state = client(master).admin().cluster().prepareState().get().getState();
|
||||
ClusterState state = client(clusterManager).admin().cluster().prepareState().get().getState();
|
||||
assertThat(state.routingTable().shardRoutingTable(shardId).activeShards(), hasSize(numberOfReplicas));
|
||||
assertThat(state.metadata().index("test").inSyncAllocationIds(shardId.id()), hasSize(numberOfReplicas + 1));
|
||||
for (String node : replicaNodes) {
|
||||
|
|
|
@ -353,7 +353,7 @@ public class DiskThresholdDeciderIT extends OpenSearchIntegTestCase {
|
|||
private void assertBusyWithDiskUsageRefresh(String nodeName, String indexName, Matcher<? super Set<ShardRouting>> matcher)
|
||||
throws Exception {
|
||||
assertBusy(() -> {
|
||||
// refresh the master's ClusterInfoService before checking the assigned shards because DiskThresholdMonitor might still
|
||||
// refresh the cluster-manager's ClusterInfoService before checking the assigned shards because DiskThresholdMonitor might still
|
||||
// be processing a previous ClusterInfo update and will skip the new one (see DiskThresholdMonitor#onNewInfo(ClusterInfo)
|
||||
// and its internal checkInProgress flag)
|
||||
refreshDiskUsage();
|
||||
|
|
|
@ -269,9 +269,9 @@ public class MockDiskUsagesIT extends OpenSearchIntegTestCase {
|
|||
|
||||
final MockInternalClusterInfoService clusterInfoService = getMockInternalClusterInfoService();
|
||||
|
||||
final AtomicReference<ClusterState> masterAppliedClusterState = new AtomicReference<>();
|
||||
final AtomicReference<ClusterState> clusterManagerAppliedClusterState = new AtomicReference<>();
|
||||
internalCluster().getCurrentMasterNodeInstance(ClusterService.class).addListener(event -> {
|
||||
masterAppliedClusterState.set(event.state());
|
||||
clusterManagerAppliedClusterState.set(event.state());
|
||||
clusterInfoService.refresh(); // so that a subsequent reroute sees disk usage according to the current state
|
||||
});
|
||||
|
||||
|
@ -326,7 +326,7 @@ public class MockDiskUsagesIT extends OpenSearchIntegTestCase {
|
|||
fsInfoPath,
|
||||
1000L,
|
||||
discoveryNode.getId().equals(nodeIds.get(2))
|
||||
? 101L - masterAppliedClusterState.get().getRoutingNodes().node(nodeIds.get(2)).numberOfOwningShards()
|
||||
? 101L - clusterManagerAppliedClusterState.get().getRoutingNodes().node(nodeIds.get(2)).numberOfOwningShards()
|
||||
: 1000L
|
||||
)
|
||||
);
|
||||
|
@ -349,7 +349,7 @@ public class MockDiskUsagesIT extends OpenSearchIntegTestCase {
|
|||
internalCluster().startNode(Settings.builder().put(Environment.PATH_DATA_SETTING.getKey(), createTempDir()));
|
||||
}
|
||||
|
||||
final AtomicReference<ClusterState> masterAppliedClusterState = new AtomicReference<>();
|
||||
final AtomicReference<ClusterState> clusterManagerAppliedClusterState = new AtomicReference<>();
|
||||
|
||||
final MockInternalClusterInfoService clusterInfoService = getMockInternalClusterInfoService();
|
||||
|
||||
|
@ -360,7 +360,7 @@ public class MockDiskUsagesIT extends OpenSearchIntegTestCase {
|
|||
|
||||
internalCluster().getCurrentMasterNodeInstance(ClusterService.class).addListener(event -> {
|
||||
assertThat(event.state().getRoutingNodes().node(nodeIds.get(2)).size(), lessThanOrEqualTo(1));
|
||||
masterAppliedClusterState.set(event.state());
|
||||
clusterManagerAppliedClusterState.set(event.state());
|
||||
clusterInfoService.refresh(); // so that a subsequent reroute sees disk usage according to the current state
|
||||
});
|
||||
|
||||
|
@ -385,7 +385,7 @@ public class MockDiskUsagesIT extends OpenSearchIntegTestCase {
|
|||
fsInfoPath,
|
||||
1000L,
|
||||
discoveryNode.getId().equals(nodeIds.get(2))
|
||||
? 150L - masterAppliedClusterState.get().getRoutingNodes().node(nodeIds.get(2)).numberOfOwningShards()
|
||||
? 150L - clusterManagerAppliedClusterState.get().getRoutingNodes().node(nodeIds.get(2)).numberOfOwningShards()
|
||||
: 1000L
|
||||
)
|
||||
);
|
||||
|
|
|
@ -240,7 +240,7 @@ public class ClusterDisruptionIT extends AbstractDisruptionTestCase {
|
|||
node
|
||||
);
|
||||
}
|
||||
// in case of a bridge partition, shard allocation can fail "index.allocation.max_retries" times if the master
|
||||
// in case of a bridge partition, shard allocation can fail "index.allocation.max_retries" times if the cluster-manager
|
||||
// is the super-connected node and recovery source and target are on opposite sides of the bridge
|
||||
if (disruptionScheme instanceof NetworkDisruption
|
||||
&& ((NetworkDisruption) disruptionScheme).getDisruptedLinks() instanceof Bridge) {
|
||||
|
@ -409,7 +409,7 @@ public class ClusterDisruptionIT extends AbstractDisruptionTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testCannotJoinIfMasterLostDataFolder() throws Exception {
|
||||
public void testCannotJoinIfClusterManagerLostDataFolder() throws Exception {
|
||||
String clusterManagerNode = internalCluster().startClusterManagerOnlyNode();
|
||||
String dataNode = internalCluster().startDataOnlyNode();
|
||||
|
||||
|
@ -424,7 +424,7 @@ public class ClusterDisruptionIT extends AbstractDisruptionTestCase {
|
|||
return Settings.builder()
|
||||
.put(ClusterBootstrapService.INITIAL_CLUSTER_MANAGER_NODES_SETTING.getKey(), nodeName)
|
||||
/*
|
||||
* the data node might join while the master is still not fully established as master just yet and bypasses the join
|
||||
* the data node might join while the cluster-manager is still not fully established as cluster-manager just yet and bypasses the join
|
||||
* validation that is done before adding the node to the cluster. Only the join validation when handling the publish
|
||||
* request takes place, but at this point the cluster state has been successfully committed, and will subsequently be
|
||||
* exposed to the applier. The health check below therefore sees the cluster state with the 2 nodes and thinks all is
|
||||
|
@ -458,34 +458,35 @@ public class ClusterDisruptionIT extends AbstractDisruptionTestCase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Tests that indices are properly deleted even if there is a master transition in between.
|
||||
* Tests that indices are properly deleted even if there is a cluster-manager transition in between.
|
||||
* Test for https://github.com/elastic/elasticsearch/issues/11665
|
||||
*/
|
||||
public void testIndicesDeleted() throws Exception {
|
||||
final String idxName = "test";
|
||||
final List<String> allMasterEligibleNodes = internalCluster().startMasterOnlyNodes(2);
|
||||
final List<String> allClusterManagerEligibleNodes = internalCluster().startMasterOnlyNodes(2);
|
||||
final String dataNode = internalCluster().startDataOnlyNode();
|
||||
ensureStableCluster(3);
|
||||
assertAcked(prepareCreate("test"));
|
||||
|
||||
final String masterNode1 = internalCluster().getMasterName();
|
||||
final String clusterManagerNode1 = internalCluster().getMasterName();
|
||||
NetworkDisruption networkDisruption = new NetworkDisruption(
|
||||
new TwoPartitions(masterNode1, dataNode),
|
||||
new TwoPartitions(clusterManagerNode1, dataNode),
|
||||
NetworkDisruption.UNRESPONSIVE
|
||||
);
|
||||
internalCluster().setDisruptionScheme(networkDisruption);
|
||||
networkDisruption.startDisrupting();
|
||||
// We know this will time out due to the partition, we check manually below to not proceed until
|
||||
// the delete has been applied to the master node and the master eligible node.
|
||||
internalCluster().client(masterNode1).admin().indices().prepareDelete(idxName).setTimeout("0s").get();
|
||||
// Don't restart the master node until we know the index deletion has taken effect on master and the master eligible node.
|
||||
// the delete has been applied to the cluster-manager node and the cluster-manager eligible node.
|
||||
internalCluster().client(clusterManagerNode1).admin().indices().prepareDelete(idxName).setTimeout("0s").get();
|
||||
// Don't restart the cluster-manager node until we know the index deletion has taken effect on cluster-manager and the
|
||||
// cluster-manager eligible node.
|
||||
assertBusy(() -> {
|
||||
for (String masterNode : allMasterEligibleNodes) {
|
||||
final ClusterState masterState = internalCluster().clusterService(masterNode).state();
|
||||
assertTrue("index not deleted on " + masterNode, masterState.metadata().hasIndex(idxName) == false);
|
||||
for (String clusterManagerNode : allClusterManagerEligibleNodes) {
|
||||
final ClusterState clusterManagerState = internalCluster().clusterService(clusterManagerNode).state();
|
||||
assertTrue("index not deleted on " + clusterManagerNode, clusterManagerState.metadata().hasIndex(idxName) == false);
|
||||
}
|
||||
});
|
||||
internalCluster().restartNode(masterNode1, InternalTestCluster.EMPTY_CALLBACK);
|
||||
internalCluster().restartNode(clusterManagerNode1, InternalTestCluster.EMPTY_CALLBACK);
|
||||
ensureYellow();
|
||||
assertFalse(client().admin().indices().prepareExists(idxName).get().isExists());
|
||||
}
|
||||
|
|
|
@ -60,20 +60,20 @@ import static org.hamcrest.Matchers.equalTo;
|
|||
import static org.hamcrest.Matchers.not;
|
||||
|
||||
/**
|
||||
* Tests relating to the loss of the master.
|
||||
* Tests relating to the loss of the cluster-manager.
|
||||
*/
|
||||
@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0)
|
||||
public class MasterDisruptionIT extends AbstractDisruptionTestCase {
|
||||
public class ClusterManagerDisruptionIT extends AbstractDisruptionTestCase {
|
||||
|
||||
/**
|
||||
* Test that cluster recovers from a long GC on master that causes other nodes to elect a new one
|
||||
* Test that cluster recovers from a long GC on cluster-manager that causes other nodes to elect a new one
|
||||
*/
|
||||
public void testMasterNodeGCs() throws Exception {
|
||||
public void testClusterManagerNodeGCs() throws Exception {
|
||||
List<String> nodes = startCluster(3);
|
||||
|
||||
String oldClusterManagerNode = internalCluster().getMasterName();
|
||||
// a very long GC, but it's OK as we remove the disruption when it has had an effect
|
||||
SingleNodeDisruption masterNodeDisruption = new IntermittentLongGCDisruption(
|
||||
SingleNodeDisruption clusterManagerNodeDisruption = new IntermittentLongGCDisruption(
|
||||
random(),
|
||||
oldClusterManagerNode,
|
||||
100,
|
||||
|
@ -81,38 +81,40 @@ public class MasterDisruptionIT extends AbstractDisruptionTestCase {
|
|||
30000,
|
||||
60000
|
||||
);
|
||||
internalCluster().setDisruptionScheme(masterNodeDisruption);
|
||||
masterNodeDisruption.startDisrupting();
|
||||
internalCluster().setDisruptionScheme(clusterManagerNodeDisruption);
|
||||
clusterManagerNodeDisruption.startDisrupting();
|
||||
|
||||
Set<String> oldNonClusterManagerNodesSet = new HashSet<>(nodes);
|
||||
oldNonClusterManagerNodesSet.remove(oldClusterManagerNode);
|
||||
|
||||
List<String> oldNonClusterManagerNodes = new ArrayList<>(oldNonClusterManagerNodesSet);
|
||||
|
||||
logger.info("waiting for nodes to de-elect master [{}]", oldClusterManagerNode);
|
||||
logger.info("waiting for nodes to de-elect cluster-manager [{}]", oldClusterManagerNode);
|
||||
for (String node : oldNonClusterManagerNodesSet) {
|
||||
assertDifferentMaster(node, oldClusterManagerNode);
|
||||
}
|
||||
|
||||
logger.info("waiting for nodes to elect a new master");
|
||||
logger.info("waiting for nodes to elect a new cluster-manager");
|
||||
ensureStableCluster(2, oldNonClusterManagerNodes.get(0));
|
||||
|
||||
// restore GC
|
||||
masterNodeDisruption.stopDisrupting();
|
||||
final TimeValue waitTime = new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + masterNodeDisruption.expectedTimeToHeal().millis());
|
||||
clusterManagerNodeDisruption.stopDisrupting();
|
||||
final TimeValue waitTime = new TimeValue(
|
||||
DISRUPTION_HEALING_OVERHEAD.millis() + clusterManagerNodeDisruption.expectedTimeToHeal().millis()
|
||||
);
|
||||
ensureStableCluster(3, waitTime, false, oldNonClusterManagerNodes.get(0));
|
||||
|
||||
// make sure all nodes agree on master
|
||||
String newMaster = internalCluster().getMasterName();
|
||||
assertThat(newMaster, not(equalTo(oldClusterManagerNode)));
|
||||
assertMaster(newMaster, nodes);
|
||||
// make sure all nodes agree on cluster-manager
|
||||
String newClusterManager = internalCluster().getMasterName();
|
||||
assertThat(newClusterManager, not(equalTo(oldClusterManagerNode)));
|
||||
assertMaster(newClusterManager, nodes);
|
||||
}
|
||||
|
||||
/**
|
||||
* This test isolates the master from rest of the cluster, waits for a new master to be elected, restores the partition
|
||||
* This test isolates the cluster-manager from rest of the cluster, waits for a new cluster-manager to be elected, restores the partition
|
||||
* and verifies that all node agree on the new cluster state
|
||||
*/
|
||||
public void testIsolateMasterAndVerifyClusterStateConsensus() throws Exception {
|
||||
public void testIsolateClusterManagerAndVerifyClusterStateConsensus() throws Exception {
|
||||
final List<String> nodes = startCluster(3);
|
||||
|
||||
assertAcked(
|
||||
|
@ -169,7 +171,7 @@ public class MasterDisruptionIT extends AbstractDisruptionTestCase {
|
|||
try {
|
||||
assertEquals("unequal versions", state.version(), nodeState.version());
|
||||
assertEquals("unequal node count", state.nodes().getSize(), nodeState.nodes().getSize());
|
||||
assertEquals("different masters ", state.nodes().getMasterNodeId(), nodeState.nodes().getMasterNodeId());
|
||||
assertEquals("different cluster-managers ", state.nodes().getMasterNodeId(), nodeState.nodes().getMasterNodeId());
|
||||
assertEquals("different meta data version", state.metadata().version(), nodeState.metadata().version());
|
||||
assertEquals("different routing", state.routingTable().toString(), nodeState.routingTable().toString());
|
||||
} catch (AssertionError t) {
|
||||
|
@ -193,7 +195,7 @@ public class MasterDisruptionIT extends AbstractDisruptionTestCase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Verify that the proper block is applied when nodes lose their master
|
||||
* Verify that the proper block is applied when nodes lose their cluster-manager
|
||||
*/
|
||||
public void testVerifyApiBlocksDuringPartition() throws Exception {
|
||||
internalCluster().startNodes(3, Settings.builder().putNull(NoMasterBlockService.NO_CLUSTER_MANAGER_BLOCK_SETTING.getKey()).build());
|
||||
|
@ -221,13 +223,13 @@ public class MasterDisruptionIT extends AbstractDisruptionTestCase {
|
|||
// Simulate a network issue between the unlucky node and the rest of the cluster.
|
||||
networkDisruption.startDisrupting();
|
||||
|
||||
// The unlucky node must report *no* master node, since it can't connect to master and in fact it should
|
||||
// The unlucky node must report *no* cluster-manager node, since it can't connect to cluster-manager and in fact it should
|
||||
// continuously ping until network failures have been resolved. However
|
||||
// It may a take a bit before the node detects it has been cut off from the elected master
|
||||
// It may a take a bit before the node detects it has been cut off from the elected cluster-manager
|
||||
logger.info("waiting for isolated node [{}] to have no cluster-manager", isolatedNode);
|
||||
assertNoMaster(isolatedNode, NoMasterBlockService.NO_MASTER_BLOCK_WRITES, TimeValue.timeValueSeconds(30));
|
||||
|
||||
logger.info("wait until elected master has been removed and a new 2 node cluster was from (via [{}])", isolatedNode);
|
||||
logger.info("wait until elected cluster-manager has been removed and a new 2 node cluster was from (via [{}])", isolatedNode);
|
||||
ensureStableCluster(2, nonIsolatedNode);
|
||||
|
||||
for (String node : partitions.getMajoritySide()) {
|
||||
|
@ -251,7 +253,7 @@ public class MasterDisruptionIT extends AbstractDisruptionTestCase {
|
|||
|
||||
networkDisruption.stopDisrupting();
|
||||
|
||||
// Wait until the master node sees al 3 nodes again.
|
||||
// Wait until the cluster-manager node sees al 3 nodes again.
|
||||
ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + networkDisruption.expectedTimeToHeal().millis()));
|
||||
|
||||
logger.info(
|
||||
|
@ -267,9 +269,9 @@ public class MasterDisruptionIT extends AbstractDisruptionTestCase {
|
|||
|
||||
networkDisruption.startDisrupting();
|
||||
|
||||
// The unlucky node must report *no* master node, since it can't connect to master and in fact it should
|
||||
// The unlucky node must report *no* cluster-manager node, since it can't connect to cluster-manager and in fact it should
|
||||
// continuously ping until network failures have been resolved. However
|
||||
// It may a take a bit before the node detects it has been cut off from the elected master
|
||||
// It may a take a bit before the node detects it has been cut off from the elected cluster-manager
|
||||
logger.info("waiting for isolated node [{}] to have no cluster-manager", isolatedNode);
|
||||
assertNoMaster(isolatedNode, NoMasterBlockService.NO_MASTER_BLOCK_ALL, TimeValue.timeValueSeconds(30));
|
||||
|
|
@ -69,22 +69,26 @@ public class DiscoveryDisruptionIT extends AbstractDisruptionTestCase {
|
|||
|
||||
DiscoveryNodes discoveryNodes = internalCluster().getInstance(ClusterService.class, nonClusterManagerNode).state().nodes();
|
||||
|
||||
TransportService masterTranspotService = internalCluster().getInstance(
|
||||
TransportService clusterManagerTranspotService = internalCluster().getInstance(
|
||||
TransportService.class,
|
||||
discoveryNodes.getMasterNode().getName()
|
||||
);
|
||||
|
||||
logger.info("blocking requests from non master [{}] to master [{}]", nonClusterManagerNode, clusterManagerNode);
|
||||
MockTransportService nonMasterTransportService = (MockTransportService) internalCluster().getInstance(
|
||||
logger.info("blocking requests from non cluster-manager [{}] to cluster-manager [{}]", nonClusterManagerNode, clusterManagerNode);
|
||||
MockTransportService nonClusterManagerTransportService = (MockTransportService) internalCluster().getInstance(
|
||||
TransportService.class,
|
||||
nonClusterManagerNode
|
||||
);
|
||||
nonMasterTransportService.addFailToSendNoConnectRule(masterTranspotService);
|
||||
nonClusterManagerTransportService.addFailToSendNoConnectRule(clusterManagerTranspotService);
|
||||
|
||||
assertNoMaster(nonClusterManagerNode);
|
||||
|
||||
logger.info("blocking cluster state publishing from master [{}] to non master [{}]", clusterManagerNode, nonClusterManagerNode);
|
||||
MockTransportService masterTransportService = (MockTransportService) internalCluster().getInstance(
|
||||
logger.info(
|
||||
"blocking cluster state publishing from cluster-manager [{}] to non cluster-manager [{}]",
|
||||
clusterManagerNode,
|
||||
nonClusterManagerNode
|
||||
);
|
||||
MockTransportService clusterManagerTransportService = (MockTransportService) internalCluster().getInstance(
|
||||
TransportService.class,
|
||||
clusterManagerNode
|
||||
);
|
||||
|
@ -93,31 +97,40 @@ public class DiscoveryDisruptionIT extends AbstractDisruptionTestCase {
|
|||
discoveryNodes.getLocalNode().getName()
|
||||
);
|
||||
if (randomBoolean()) {
|
||||
masterTransportService.addFailToSendNoConnectRule(localTransportService, PublicationTransportHandler.PUBLISH_STATE_ACTION_NAME);
|
||||
clusterManagerTransportService.addFailToSendNoConnectRule(
|
||||
localTransportService,
|
||||
PublicationTransportHandler.PUBLISH_STATE_ACTION_NAME
|
||||
);
|
||||
} else {
|
||||
masterTransportService.addFailToSendNoConnectRule(localTransportService, PublicationTransportHandler.COMMIT_STATE_ACTION_NAME);
|
||||
clusterManagerTransportService.addFailToSendNoConnectRule(
|
||||
localTransportService,
|
||||
PublicationTransportHandler.COMMIT_STATE_ACTION_NAME
|
||||
);
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"allowing requests from non master [{}] to master [{}], waiting for two join request",
|
||||
"allowing requests from non cluster-manager [{}] to cluster-manager [{}], waiting for two join request",
|
||||
nonClusterManagerNode,
|
||||
clusterManagerNode
|
||||
);
|
||||
final CountDownLatch countDownLatch = new CountDownLatch(2);
|
||||
nonMasterTransportService.addSendBehavior(masterTransportService, (connection, requestId, action, request, options) -> {
|
||||
if (action.equals(JoinHelper.JOIN_ACTION_NAME)) {
|
||||
countDownLatch.countDown();
|
||||
nonClusterManagerTransportService.addSendBehavior(
|
||||
clusterManagerTransportService,
|
||||
(connection, requestId, action, request, options) -> {
|
||||
if (action.equals(JoinHelper.JOIN_ACTION_NAME)) {
|
||||
countDownLatch.countDown();
|
||||
}
|
||||
connection.sendRequest(requestId, action, request, options);
|
||||
}
|
||||
connection.sendRequest(requestId, action, request, options);
|
||||
});
|
||||
);
|
||||
|
||||
nonMasterTransportService.addConnectBehavior(masterTransportService, Transport::openConnection);
|
||||
nonClusterManagerTransportService.addConnectBehavior(clusterManagerTransportService, Transport::openConnection);
|
||||
|
||||
countDownLatch.await();
|
||||
|
||||
logger.info("waiting for cluster to reform");
|
||||
masterTransportService.clearOutboundRules(localTransportService);
|
||||
nonMasterTransportService.clearOutboundRules(localTransportService);
|
||||
clusterManagerTransportService.clearOutboundRules(localTransportService);
|
||||
nonClusterManagerTransportService.clearOutboundRules(localTransportService);
|
||||
|
||||
ensureStableCluster(2);
|
||||
|
||||
|
@ -141,7 +154,7 @@ public class DiscoveryDisruptionIT extends AbstractDisruptionTestCase {
|
|||
ensureStableCluster(3);
|
||||
}
|
||||
|
||||
public void testElectMasterWithLatestVersion() throws Exception {
|
||||
public void testElectClusterManagerWithLatestVersion() throws Exception {
|
||||
final Set<String> nodes = new HashSet<>(internalCluster().startNodes(3));
|
||||
ensureStableCluster(3);
|
||||
ServiceDisruptionScheme isolateAllNodes = new NetworkDisruption(
|
||||
|
@ -150,22 +163,22 @@ public class DiscoveryDisruptionIT extends AbstractDisruptionTestCase {
|
|||
);
|
||||
internalCluster().setDisruptionScheme(isolateAllNodes);
|
||||
|
||||
logger.info("--> forcing a complete election to make sure \"preferred\" master is elected");
|
||||
logger.info("--> forcing a complete election to make sure \"preferred\" cluster-manager is elected");
|
||||
isolateAllNodes.startDisrupting();
|
||||
for (String node : nodes) {
|
||||
assertNoMaster(node);
|
||||
}
|
||||
internalCluster().clearDisruptionScheme();
|
||||
ensureStableCluster(3);
|
||||
final String preferredMasterName = internalCluster().getMasterName();
|
||||
final DiscoveryNode preferredMaster = internalCluster().clusterService(preferredMasterName).localNode();
|
||||
final String preferredClusterManagerName = internalCluster().getMasterName();
|
||||
final DiscoveryNode preferredClusterManager = internalCluster().clusterService(preferredClusterManagerName).localNode();
|
||||
|
||||
logger.info("--> preferred master is {}", preferredMaster);
|
||||
logger.info("--> preferred cluster-manager is {}", preferredClusterManager);
|
||||
final Set<String> nonPreferredNodes = new HashSet<>(nodes);
|
||||
nonPreferredNodes.remove(preferredMasterName);
|
||||
final ServiceDisruptionScheme isolatePreferredMaster = isolateMasterDisruption(NetworkDisruption.DISCONNECT);
|
||||
internalCluster().setDisruptionScheme(isolatePreferredMaster);
|
||||
isolatePreferredMaster.startDisrupting();
|
||||
nonPreferredNodes.remove(preferredClusterManagerName);
|
||||
final ServiceDisruptionScheme isolatePreferredClusterManager = isolateClusterManagerDisruption(NetworkDisruption.DISCONNECT);
|
||||
internalCluster().setDisruptionScheme(isolatePreferredClusterManager);
|
||||
isolatePreferredClusterManager.startDisrupting();
|
||||
|
||||
client(randomFrom(nonPreferredNodes)).admin()
|
||||
.indices()
|
||||
|
@ -194,11 +207,11 @@ public class DiscoveryDisruptionIT extends AbstractDisruptionTestCase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Adds an asymmetric break between a master and one of the nodes and makes
|
||||
* Adds an asymmetric break between a cluster-manager and one of the nodes and makes
|
||||
* sure that the node is removed form the cluster, that the node start pinging and that
|
||||
* the cluster reforms when healed.
|
||||
*/
|
||||
public void testNodeNotReachableFromMaster() throws Exception {
|
||||
public void testNodeNotReachableFromClusterManager() throws Exception {
|
||||
startCluster(3);
|
||||
|
||||
String clusterManagerNode = internalCluster().getMasterName();
|
||||
|
@ -210,15 +223,19 @@ public class DiscoveryDisruptionIT extends AbstractDisruptionTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
logger.info("blocking request from master [{}] to [{}]", clusterManagerNode, nonClusterManagerNode);
|
||||
MockTransportService masterTransportService = (MockTransportService) internalCluster().getInstance(
|
||||
logger.info("blocking request from cluster-manager [{}] to [{}]", clusterManagerNode, nonClusterManagerNode);
|
||||
MockTransportService clusterManagerTransportService = (MockTransportService) internalCluster().getInstance(
|
||||
TransportService.class,
|
||||
clusterManagerNode
|
||||
);
|
||||
if (randomBoolean()) {
|
||||
masterTransportService.addUnresponsiveRule(internalCluster().getInstance(TransportService.class, nonClusterManagerNode));
|
||||
clusterManagerTransportService.addUnresponsiveRule(
|
||||
internalCluster().getInstance(TransportService.class, nonClusterManagerNode)
|
||||
);
|
||||
} else {
|
||||
masterTransportService.addFailToSendNoConnectRule(internalCluster().getInstance(TransportService.class, nonClusterManagerNode));
|
||||
clusterManagerTransportService.addFailToSendNoConnectRule(
|
||||
internalCluster().getInstance(TransportService.class, nonClusterManagerNode)
|
||||
);
|
||||
}
|
||||
|
||||
logger.info("waiting for [{}] to be removed from cluster", nonClusterManagerNode);
|
||||
|
@ -228,7 +245,7 @@ public class DiscoveryDisruptionIT extends AbstractDisruptionTestCase {
|
|||
assertNoMaster(nonClusterManagerNode);
|
||||
|
||||
logger.info("healing partition and checking cluster reforms");
|
||||
masterTransportService.clearAllRules();
|
||||
clusterManagerTransportService.clearAllRules();
|
||||
|
||||
ensureStableCluster(3);
|
||||
}
|
||||
|
|
|
@ -94,12 +94,12 @@ public class SnapshotDisruptionIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
createRepository("test-repo", "fs");
|
||||
|
||||
final String masterNode1 = internalCluster().getMasterName();
|
||||
final String clusterManagerNode1 = internalCluster().getMasterName();
|
||||
|
||||
NetworkDisruption networkDisruption = isolateMasterDisruption(NetworkDisruption.UNRESPONSIVE);
|
||||
NetworkDisruption networkDisruption = isolateClusterManagerDisruption(NetworkDisruption.UNRESPONSIVE);
|
||||
internalCluster().setDisruptionScheme(networkDisruption);
|
||||
|
||||
ClusterService clusterService = internalCluster().clusterService(masterNode1);
|
||||
ClusterService clusterService = internalCluster().clusterService(clusterManagerNode1);
|
||||
CountDownLatch disruptionStarted = new CountDownLatch(1);
|
||||
clusterService.addListener(new ClusterStateListener() {
|
||||
@Override
|
||||
|
@ -124,7 +124,7 @@ public class SnapshotDisruptionIT extends AbstractSnapshotIntegTestCase {
|
|||
final String snapshot = "test-snap";
|
||||
|
||||
logger.info("--> starting snapshot");
|
||||
ActionFuture<CreateSnapshotResponse> future = client(masterNode1).admin()
|
||||
ActionFuture<CreateSnapshotResponse> future = client(clusterManagerNode1).admin()
|
||||
.cluster()
|
||||
.prepareCreateSnapshot("test-repo", snapshot)
|
||||
.setWaitForCompletion(true)
|
||||
|
@ -147,7 +147,7 @@ public class SnapshotDisruptionIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
logger.info("--> stopping disrupting");
|
||||
networkDisruption.stopDisrupting();
|
||||
ensureStableCluster(4, masterNode1);
|
||||
ensureStableCluster(4, clusterManagerNode1);
|
||||
logger.info("--> done");
|
||||
|
||||
try {
|
||||
|
@ -158,7 +158,7 @@ public class SnapshotDisruptionIT extends AbstractSnapshotIntegTestCase {
|
|||
assertNotNull(sne);
|
||||
assertThat(
|
||||
sne.getMessage(),
|
||||
either(endsWith(" Failed to update cluster state during snapshot finalization")).or(endsWith(" no longer master"))
|
||||
either(endsWith(" Failed to update cluster state during snapshot finalization")).or(endsWith(" no longer cluster-manager"))
|
||||
);
|
||||
assertThat(sne.getSnapshotName(), is(snapshot));
|
||||
}
|
||||
|
@ -177,13 +177,13 @@ public class SnapshotDisruptionIT extends AbstractSnapshotIntegTestCase {
|
|||
final String repoName = "test-repo";
|
||||
createRepository(repoName, "mock");
|
||||
|
||||
final String masterNode = internalCluster().getMasterName();
|
||||
final String clusterManagerNode = internalCluster().getMasterName();
|
||||
|
||||
blockAllDataNodes(repoName);
|
||||
|
||||
final String snapshot = "test-snap";
|
||||
logger.info("--> starting snapshot");
|
||||
ActionFuture<CreateSnapshotResponse> future = client(masterNode).admin()
|
||||
ActionFuture<CreateSnapshotResponse> future = client(clusterManagerNode).admin()
|
||||
.cluster()
|
||||
.prepareCreateSnapshot(repoName, snapshot)
|
||||
.setWaitForCompletion(true)
|
||||
|
@ -191,7 +191,7 @@ public class SnapshotDisruptionIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
waitForBlockOnAnyDataNode(repoName, TimeValue.timeValueSeconds(10L));
|
||||
|
||||
NetworkDisruption networkDisruption = isolateMasterDisruption(NetworkDisruption.DISCONNECT);
|
||||
NetworkDisruption networkDisruption = isolateClusterManagerDisruption(NetworkDisruption.DISCONNECT);
|
||||
internalCluster().setDisruptionScheme(networkDisruption);
|
||||
networkDisruption.startDisrupting();
|
||||
|
||||
|
@ -203,7 +203,7 @@ public class SnapshotDisruptionIT extends AbstractSnapshotIntegTestCase {
|
|||
networkDisruption.stopDisrupting();
|
||||
unblockAllDataNodes(repoName);
|
||||
|
||||
ensureStableCluster(2, masterNode);
|
||||
ensureStableCluster(2, clusterManagerNode);
|
||||
logger.info("--> done");
|
||||
|
||||
logger.info("--> recreate the index with potentially different shard counts");
|
||||
|
@ -213,17 +213,17 @@ public class SnapshotDisruptionIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
logger.info("--> run a snapshot that fails to finalize but succeeds on the data node");
|
||||
blockMasterFromFinalizingSnapshotOnIndexFile(repoName);
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotFuture = client(masterNode).admin()
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotFuture = client(clusterManagerNode).admin()
|
||||
.cluster()
|
||||
.prepareCreateSnapshot(repoName, "snapshot-2")
|
||||
.setWaitForCompletion(true)
|
||||
.execute();
|
||||
waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(10L));
|
||||
unblockNode(repoName, masterNode);
|
||||
waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(10L));
|
||||
unblockNode(repoName, clusterManagerNode);
|
||||
assertFutureThrows(snapshotFuture, SnapshotException.class);
|
||||
|
||||
logger.info("--> create a snapshot expected to be successful");
|
||||
final CreateSnapshotResponse successfulSnapshot = client(masterNode).admin()
|
||||
final CreateSnapshotResponse successfulSnapshot = client(clusterManagerNode).admin()
|
||||
.cluster()
|
||||
.prepareCreateSnapshot(repoName, "snapshot-2")
|
||||
.setWaitForCompletion(true)
|
||||
|
@ -235,7 +235,7 @@ public class SnapshotDisruptionIT extends AbstractSnapshotIntegTestCase {
|
|||
assertAcked(client().admin().cluster().prepareDeleteSnapshot(repoName, "snapshot-2").get());
|
||||
}
|
||||
|
||||
public void testMasterFailOverDuringShardSnapshots() throws Exception {
|
||||
public void testClusterManagerFailOverDuringShardSnapshots() throws Exception {
|
||||
internalCluster().startMasterOnlyNodes(3);
|
||||
final String dataNode = internalCluster().startDataOnlyNode();
|
||||
ensureStableCluster(4);
|
||||
|
@ -258,7 +258,7 @@ public class SnapshotDisruptionIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
waitForBlock(dataNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
|
||||
final NetworkDisruption networkDisruption = isolateMasterDisruption(NetworkDisruption.DISCONNECT);
|
||||
final NetworkDisruption networkDisruption = isolateClusterManagerDisruption(NetworkDisruption.DISCONNECT);
|
||||
internalCluster().setDisruptionScheme(networkDisruption);
|
||||
networkDisruption.startDisrupting();
|
||||
ensureStableCluster(3, dataNode);
|
||||
|
@ -267,7 +267,7 @@ public class SnapshotDisruptionIT extends AbstractSnapshotIntegTestCase {
|
|||
networkDisruption.stopDisrupting();
|
||||
awaitNoMoreRunningOperations(dataNode);
|
||||
|
||||
logger.info("--> make sure isolated master responds to snapshot request");
|
||||
logger.info("--> make sure isolated cluster-manager responds to snapshot request");
|
||||
final SnapshotException sne = expectThrows(
|
||||
SnapshotException.class,
|
||||
() -> snapshotResponse.actionGet(TimeValue.timeValueSeconds(30L))
|
||||
|
|
|
@ -77,7 +77,7 @@ import static org.hamcrest.Matchers.equalTo;
|
|||
* not detect a cluster-manager failure too quickly.
|
||||
*/
|
||||
@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0)
|
||||
public class StableMasterDisruptionIT extends OpenSearchIntegTestCase {
|
||||
public class StableClusterManagerDisruptionIT extends OpenSearchIntegTestCase {
|
||||
|
||||
@Override
|
||||
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||
|
@ -87,43 +87,43 @@ public class StableMasterDisruptionIT extends OpenSearchIntegTestCase {
|
|||
/**
|
||||
* Test that no split brain occurs under partial network partition. See https://github.com/elastic/elasticsearch/issues/2488
|
||||
*/
|
||||
public void testFailWithMinimumMasterNodesConfigured() throws Exception {
|
||||
public void testFailWithMinimumClusterManagerNodesConfigured() throws Exception {
|
||||
List<String> nodes = internalCluster().startNodes(3);
|
||||
ensureStableCluster(3);
|
||||
|
||||
// Figure out what is the elected master node
|
||||
final String masterNode = internalCluster().getMasterName();
|
||||
logger.info("---> legit elected master node={}", masterNode);
|
||||
// Figure out what is the elected cluster-manager node
|
||||
final String clusterManagerNode = internalCluster().getMasterName();
|
||||
logger.info("---> legit elected cluster-manager node={}", clusterManagerNode);
|
||||
|
||||
// Pick a node that isn't the elected master.
|
||||
Set<String> nonMasters = new HashSet<>(nodes);
|
||||
nonMasters.remove(masterNode);
|
||||
final String unluckyNode = randomFrom(nonMasters.toArray(Strings.EMPTY_ARRAY));
|
||||
// Pick a node that isn't the elected cluster-manager.
|
||||
Set<String> nonClusterManagers = new HashSet<>(nodes);
|
||||
nonClusterManagers.remove(clusterManagerNode);
|
||||
final String unluckyNode = randomFrom(nonClusterManagers.toArray(Strings.EMPTY_ARRAY));
|
||||
|
||||
// Simulate a network issue between the unlucky node and elected master node in both directions.
|
||||
// Simulate a network issue between the unlucky node and elected cluster-manager node in both directions.
|
||||
|
||||
NetworkDisruption networkDisconnect = new NetworkDisruption(
|
||||
new NetworkDisruption.TwoPartitions(masterNode, unluckyNode),
|
||||
new NetworkDisruption.TwoPartitions(clusterManagerNode, unluckyNode),
|
||||
NetworkDisruption.DISCONNECT
|
||||
);
|
||||
setDisruptionScheme(networkDisconnect);
|
||||
networkDisconnect.startDisrupting();
|
||||
|
||||
// Wait until elected master has removed that the unlucky node...
|
||||
ensureStableCluster(2, masterNode);
|
||||
// Wait until elected cluster-manager has removed that the unlucky node...
|
||||
ensureStableCluster(2, clusterManagerNode);
|
||||
|
||||
// The unlucky node must report *no* master node, since it can't connect to master and in fact it should
|
||||
// The unlucky node must report *no* cluster-manager node, since it can't connect to cluster-manager and in fact it should
|
||||
// continuously ping until network failures have been resolved. However
|
||||
// It may a take a bit before the node detects it has been cut off from the elected master
|
||||
// It may a take a bit before the node detects it has been cut off from the elected cluster-manager
|
||||
ensureNoMaster(unluckyNode);
|
||||
|
||||
networkDisconnect.stopDisrupting();
|
||||
|
||||
// Wait until the master node sees all 3 nodes again.
|
||||
// Wait until the cluster-manager node sees all 3 nodes again.
|
||||
ensureStableCluster(3);
|
||||
|
||||
// The elected master shouldn't have changed, since the unlucky node never could have elected itself as master
|
||||
assertThat(internalCluster().getMasterName(), equalTo(masterNode));
|
||||
// The elected cluster-manager shouldn't have changed, since the unlucky node never could have elected itself as cluster-manager
|
||||
assertThat(internalCluster().getMasterName(), equalTo(clusterManagerNode));
|
||||
}
|
||||
|
||||
private void ensureNoMaster(String node) throws Exception {
|
||||
|
@ -135,17 +135,17 @@ public class StableMasterDisruptionIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Verify that nodes fault detection detects a disconnected node after master reelection
|
||||
* Verify that nodes fault detection detects a disconnected node after cluster-manager reelection
|
||||
*/
|
||||
public void testFollowerCheckerDetectsDisconnectedNodeAfterMasterReelection() throws Exception {
|
||||
testFollowerCheckerAfterMasterReelection(NetworkDisruption.DISCONNECT, Settings.EMPTY);
|
||||
public void testFollowerCheckerDetectsDisconnectedNodeAfterClusterManagerReelection() throws Exception {
|
||||
testFollowerCheckerAfterClusterManagerReelection(NetworkDisruption.DISCONNECT, Settings.EMPTY);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that nodes fault detection detects an unresponsive node after master reelection
|
||||
* Verify that nodes fault detection detects an unresponsive node after cluster-manager reelection
|
||||
*/
|
||||
public void testFollowerCheckerDetectsUnresponsiveNodeAfterMasterReelection() throws Exception {
|
||||
testFollowerCheckerAfterMasterReelection(
|
||||
public void testFollowerCheckerDetectsUnresponsiveNodeAfterClusterManagerReelection() throws Exception {
|
||||
testFollowerCheckerAfterClusterManagerReelection(
|
||||
NetworkDisruption.UNRESPONSIVE,
|
||||
Settings.builder()
|
||||
.put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "1s")
|
||||
|
@ -156,34 +156,34 @@ public class StableMasterDisruptionIT extends OpenSearchIntegTestCase {
|
|||
);
|
||||
}
|
||||
|
||||
private void testFollowerCheckerAfterMasterReelection(NetworkLinkDisruptionType networkLinkDisruptionType, Settings settings)
|
||||
private void testFollowerCheckerAfterClusterManagerReelection(NetworkLinkDisruptionType networkLinkDisruptionType, Settings settings)
|
||||
throws Exception {
|
||||
internalCluster().startNodes(4, settings);
|
||||
ensureStableCluster(4);
|
||||
|
||||
logger.info("--> stopping current master");
|
||||
logger.info("--> stopping current cluster-manager");
|
||||
internalCluster().stopCurrentMasterNode();
|
||||
|
||||
ensureStableCluster(3);
|
||||
|
||||
final String master = internalCluster().getMasterName();
|
||||
final List<String> nonMasters = Arrays.stream(internalCluster().getNodeNames())
|
||||
.filter(n -> master.equals(n) == false)
|
||||
final String clusterManager = internalCluster().getMasterName();
|
||||
final List<String> nonClusterManagers = Arrays.stream(internalCluster().getNodeNames())
|
||||
.filter(n -> clusterManager.equals(n) == false)
|
||||
.collect(Collectors.toList());
|
||||
final String isolatedNode = randomFrom(nonMasters);
|
||||
final String otherNode = nonMasters.get(nonMasters.get(0).equals(isolatedNode) ? 1 : 0);
|
||||
final String isolatedNode = randomFrom(nonClusterManagers);
|
||||
final String otherNode = nonClusterManagers.get(nonClusterManagers.get(0).equals(isolatedNode) ? 1 : 0);
|
||||
|
||||
logger.info("--> isolating [{}]", isolatedNode);
|
||||
|
||||
final NetworkDisruption networkDisruption = new NetworkDisruption(
|
||||
new TwoPartitions(singleton(isolatedNode), Sets.newHashSet(master, otherNode)),
|
||||
new TwoPartitions(singleton(isolatedNode), Sets.newHashSet(clusterManager, otherNode)),
|
||||
networkLinkDisruptionType
|
||||
);
|
||||
setDisruptionScheme(networkDisruption);
|
||||
networkDisruption.startDisrupting();
|
||||
|
||||
logger.info("--> waiting for master to remove it");
|
||||
ensureStableCluster(2, master);
|
||||
logger.info("--> waiting for cluster-manager to remove it");
|
||||
ensureStableCluster(2, clusterManager);
|
||||
ensureNoMaster(isolatedNode);
|
||||
|
||||
networkDisruption.stopDisrupting();
|
||||
|
@ -191,10 +191,10 @@ public class StableMasterDisruptionIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Tests that emulates a frozen elected master node that unfreezes and pushes its cluster state to other nodes that already are
|
||||
* following another elected master node. These nodes should reject this cluster state and prevent them from following the stale master.
|
||||
* Tests that emulates a frozen elected cluster-manager node that unfreezes and pushes its cluster state to other nodes that already are
|
||||
* following another elected cluster-manager node. These nodes should reject this cluster state and prevent them from following the stale cluster-manager.
|
||||
*/
|
||||
public void testStaleMasterNotHijackingMajority() throws Exception {
|
||||
public void testStaleClusterManagerNotHijackingMajority() throws Exception {
|
||||
final List<String> nodes = internalCluster().startNodes(
|
||||
3,
|
||||
Settings.builder()
|
||||
|
@ -204,60 +204,63 @@ public class StableMasterDisruptionIT extends OpenSearchIntegTestCase {
|
|||
);
|
||||
ensureStableCluster(3);
|
||||
|
||||
// Save the current master node as old master node, because that node will get frozen
|
||||
final String oldMasterNode = internalCluster().getMasterName();
|
||||
// Save the current cluster-manager node as old cluster-manager node, because that node will get frozen
|
||||
final String oldClusterManagerNode = internalCluster().getMasterName();
|
||||
|
||||
// Simulating a painful gc by suspending all threads for a long time on the current elected master node.
|
||||
SingleNodeDisruption masterNodeDisruption = new LongGCDisruption(random(), oldMasterNode);
|
||||
// Simulating a painful gc by suspending all threads for a long time on the current elected cluster-manager node.
|
||||
SingleNodeDisruption clusterManagerNodeDisruption = new LongGCDisruption(random(), oldClusterManagerNode);
|
||||
|
||||
// Save the majority side
|
||||
final List<String> majoritySide = new ArrayList<>(nodes);
|
||||
majoritySide.remove(oldMasterNode);
|
||||
majoritySide.remove(oldClusterManagerNode);
|
||||
|
||||
// Keeps track of the previous and current master when a master node transition took place on each node on the majority side:
|
||||
final Map<String, List<Tuple<String, String>>> masters = Collections.synchronizedMap(new HashMap<>());
|
||||
// Keeps track of the previous and current cluster-manager when a cluster-manager node transition took place on each node on the
|
||||
// majority side:
|
||||
final Map<String, List<Tuple<String, String>>> clusterManagers = Collections.synchronizedMap(new HashMap<>());
|
||||
for (final String node : majoritySide) {
|
||||
masters.put(node, new ArrayList<>());
|
||||
clusterManagers.put(node, new ArrayList<>());
|
||||
internalCluster().getInstance(ClusterService.class, node).addListener(event -> {
|
||||
DiscoveryNode previousMaster = event.previousState().nodes().getMasterNode();
|
||||
DiscoveryNode currentMaster = event.state().nodes().getMasterNode();
|
||||
if (!Objects.equals(previousMaster, currentMaster)) {
|
||||
DiscoveryNode previousClusterManager = event.previousState().nodes().getMasterNode();
|
||||
DiscoveryNode currentClusterManager = event.state().nodes().getMasterNode();
|
||||
if (!Objects.equals(previousClusterManager, currentClusterManager)) {
|
||||
logger.info(
|
||||
"--> node {} received new cluster state: {} \n and had previous cluster state: {}",
|
||||
node,
|
||||
event.state(),
|
||||
event.previousState()
|
||||
);
|
||||
String previousClusterManagerNodeName = previousMaster != null ? previousMaster.getName() : null;
|
||||
String currentMasterNodeName = currentMaster != null ? currentMaster.getName() : null;
|
||||
masters.get(node).add(new Tuple<>(previousClusterManagerNodeName, currentMasterNodeName));
|
||||
String previousClusterManagerNodeName = previousClusterManager != null ? previousClusterManager.getName() : null;
|
||||
String currentClusterManagerNodeName = currentClusterManager != null ? currentClusterManager.getName() : null;
|
||||
clusterManagers.get(node).add(new Tuple<>(previousClusterManagerNodeName, currentClusterManagerNodeName));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
final CountDownLatch oldMasterNodeSteppedDown = new CountDownLatch(1);
|
||||
internalCluster().getInstance(ClusterService.class, oldMasterNode).addListener(event -> {
|
||||
final CountDownLatch oldClusterManagerNodeSteppedDown = new CountDownLatch(1);
|
||||
internalCluster().getInstance(ClusterService.class, oldClusterManagerNode).addListener(event -> {
|
||||
if (event.state().nodes().getMasterNodeId() == null) {
|
||||
oldMasterNodeSteppedDown.countDown();
|
||||
oldClusterManagerNodeSteppedDown.countDown();
|
||||
}
|
||||
});
|
||||
|
||||
internalCluster().setDisruptionScheme(masterNodeDisruption);
|
||||
logger.info("--> freezing node [{}]", oldMasterNode);
|
||||
masterNodeDisruption.startDisrupting();
|
||||
internalCluster().setDisruptionScheme(clusterManagerNodeDisruption);
|
||||
logger.info("--> freezing node [{}]", oldClusterManagerNode);
|
||||
clusterManagerNodeDisruption.startDisrupting();
|
||||
|
||||
// Wait for majority side to elect a new master
|
||||
// Wait for majority side to elect a new cluster-manager
|
||||
assertBusy(() -> {
|
||||
for (final Map.Entry<String, List<Tuple<String, String>>> entry : masters.entrySet()) {
|
||||
for (final Map.Entry<String, List<Tuple<String, String>>> entry : clusterManagers.entrySet()) {
|
||||
final List<Tuple<String, String>> transitions = entry.getValue();
|
||||
assertTrue(entry.getKey() + ": " + transitions, transitions.stream().anyMatch(transition -> transition.v2() != null));
|
||||
}
|
||||
});
|
||||
|
||||
// The old master node is frozen, but here we submit a cluster state update task that doesn't get executed, but will be queued and
|
||||
// once the old master node un-freezes it gets executed. The old master node will send this update + the cluster state where it is
|
||||
// flagged as master to the other nodes that follow the new master. These nodes should ignore this update.
|
||||
internalCluster().getInstance(ClusterService.class, oldMasterNode)
|
||||
// The old cluster-manager node is frozen, but here we submit a cluster state update task that doesn't get executed, but will be
|
||||
// queued and
|
||||
// once the old cluster-manager node un-freezes it gets executed. The old cluster-manager node will send this update + the cluster
|
||||
// state where it is
|
||||
// flagged as cluster-manager to the other nodes that follow the new cluster-manager. These nodes should ignore this update.
|
||||
internalCluster().getInstance(ClusterService.class, oldClusterManagerNode)
|
||||
.submitStateUpdateTask("sneaky-update", new ClusterStateUpdateTask(Priority.IMMEDIATE) {
|
||||
@Override
|
||||
public ClusterState execute(ClusterState currentState) {
|
||||
|
@ -270,25 +273,30 @@ public class StableMasterDisruptionIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
});
|
||||
|
||||
// Save the new elected master node
|
||||
final String newMasterNode = internalCluster().getMasterName(majoritySide.get(0));
|
||||
logger.info("--> new detected master node [{}]", newMasterNode);
|
||||
// Save the new elected cluster-manager node
|
||||
final String newClusterManagerNode = internalCluster().getMasterName(majoritySide.get(0));
|
||||
logger.info("--> new detected cluster-manager node [{}]", newClusterManagerNode);
|
||||
|
||||
// Stop disruption
|
||||
logger.info("--> unfreezing node [{}]", oldMasterNode);
|
||||
masterNodeDisruption.stopDisrupting();
|
||||
logger.info("--> unfreezing node [{}]", oldClusterManagerNode);
|
||||
clusterManagerNodeDisruption.stopDisrupting();
|
||||
|
||||
oldMasterNodeSteppedDown.await(30, TimeUnit.SECONDS);
|
||||
logger.info("--> [{}] stepped down as master", oldMasterNode);
|
||||
oldClusterManagerNodeSteppedDown.await(30, TimeUnit.SECONDS);
|
||||
logger.info("--> [{}] stepped down as cluster-manager", oldClusterManagerNode);
|
||||
ensureStableCluster(3);
|
||||
|
||||
assertThat(masters.size(), equalTo(2));
|
||||
for (Map.Entry<String, List<Tuple<String, String>>> entry : masters.entrySet()) {
|
||||
assertThat(clusterManagers.size(), equalTo(2));
|
||||
for (Map.Entry<String, List<Tuple<String, String>>> entry : clusterManagers.entrySet()) {
|
||||
String nodeName = entry.getKey();
|
||||
List<Tuple<String, String>> transitions = entry.getValue();
|
||||
assertTrue(
|
||||
"[" + nodeName + "] should not apply state from old master [" + oldMasterNode + "] but it did: " + transitions,
|
||||
transitions.stream().noneMatch(t -> oldMasterNode.equals(t.v2()))
|
||||
"["
|
||||
+ nodeName
|
||||
+ "] should not apply state from old cluster-manager ["
|
||||
+ oldClusterManagerNode
|
||||
+ "] but it did: "
|
||||
+ transitions,
|
||||
transitions.stream().noneMatch(t -> oldClusterManagerNode.equals(t.v2()))
|
||||
);
|
||||
}
|
||||
}
|
|
@ -79,10 +79,10 @@ public class NodeEnvironmentIT extends OpenSearchIntegTestCase {
|
|||
);
|
||||
}
|
||||
|
||||
logger.info("--> restarting the node without the data and master roles");
|
||||
logger.info("--> restarting the node without the data and cluster-manager roles");
|
||||
IllegalStateException ex = expectThrows(
|
||||
IllegalStateException.class,
|
||||
"node not having the data and master roles while having existing index metadata must fail",
|
||||
"node not having the data and cluster-manager roles while having existing index metadata must fail",
|
||||
() -> internalCluster().restartRandomDataNode(new InternalTestCluster.RestartCallback() {
|
||||
@Override
|
||||
public Settings onNodeStopped(String nodeName) {
|
||||
|
@ -100,7 +100,7 @@ public class NodeEnvironmentIT extends OpenSearchIntegTestCase {
|
|||
assertThat(ex.getMessage(), startsWith("node does not have the data role but has shard data"));
|
||||
}
|
||||
|
||||
logger.info("--> start the node again with data and master roles");
|
||||
logger.info("--> start the node again with data and cluster-manager roles");
|
||||
internalCluster().startNode(dataPathSettings);
|
||||
|
||||
logger.info("--> indexing a simple document");
|
||||
|
|
|
@ -121,8 +121,8 @@ public class NodeRepurposeCommandIT extends OpenSearchIntegTestCase {
|
|||
|
||||
executeRepurposeCommand(noClusterManagerNoDataSettingsForClusterManagerNode, 1, 0);
|
||||
|
||||
// by restarting as master and data node, we can check that the index definition was really deleted and also that the tool
|
||||
// does not mess things up so much that the nodes cannot boot as master or data node any longer.
|
||||
// by restarting as cluster-manager and data node, we can check that the index definition was really deleted and also that the tool
|
||||
// does not mess things up so much that the nodes cannot boot as cluster-manager or data node any longer.
|
||||
internalCluster().startClusterManagerOnlyNode(clusterManagerNodeDataPathSettings);
|
||||
internalCluster().startDataOnlyNode(dataNodeDataPathSettings);
|
||||
|
||||
|
|
|
@ -241,16 +241,16 @@ public class GatewayIndexStateIT extends OpenSearchIntegTestCase {
|
|||
client().prepareIndex("test").setId("2").setSource("field1", "value1").execute().actionGet();
|
||||
}
|
||||
|
||||
public void testJustMasterNode() throws Exception {
|
||||
public void testJustClusterManagerNode() throws Exception {
|
||||
logger.info("--> cleaning nodes");
|
||||
|
||||
logger.info("--> starting 1 master node non data");
|
||||
logger.info("--> starting 1 cluster-manager node non data");
|
||||
internalCluster().startNode(nonDataNode());
|
||||
|
||||
logger.info("--> create an index");
|
||||
client().admin().indices().prepareCreate("test").setWaitForActiveShards(ActiveShardCount.NONE).execute().actionGet();
|
||||
|
||||
logger.info("--> restarting master node");
|
||||
logger.info("--> restarting cluster-manager node");
|
||||
internalCluster().fullRestart(new RestartCallback() {
|
||||
@Override
|
||||
public Settings onNodeStopped(String nodeName) {
|
||||
|
@ -273,10 +273,10 @@ public class GatewayIndexStateIT extends OpenSearchIntegTestCase {
|
|||
assertThat(clusterStateResponse.getState().metadata().hasIndex("test"), equalTo(true));
|
||||
}
|
||||
|
||||
public void testJustMasterNodeAndJustDataNode() {
|
||||
public void testJustClusterManagerNodeAndJustDataNode() {
|
||||
logger.info("--> cleaning nodes");
|
||||
|
||||
logger.info("--> starting 1 master node non data");
|
||||
logger.info("--> starting 1 cluster-manager node non data");
|
||||
internalCluster().startClusterManagerOnlyNode();
|
||||
internalCluster().startDataOnlyNode();
|
||||
|
||||
|
|
|
@ -60,18 +60,18 @@ import static org.hamcrest.Matchers.equalTo;
|
|||
public class MetadataNodesIT extends OpenSearchIntegTestCase {
|
||||
public void testMetaWrittenAlsoOnDataNode() throws Exception {
|
||||
// this test checks that index state is written on data only nodes if they have a shard allocated
|
||||
String masterNode = internalCluster().startClusterManagerOnlyNode(Settings.EMPTY);
|
||||
String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(Settings.EMPTY);
|
||||
String dataNode = internalCluster().startDataOnlyNode(Settings.EMPTY);
|
||||
assertAcked(prepareCreate("test").setSettings(Settings.builder().put("index.number_of_replicas", 0)));
|
||||
index("test", "_doc", "1", jsonBuilder().startObject().field("text", "some text").endObject());
|
||||
ensureGreen("test");
|
||||
assertIndexInMetaState(dataNode, "test");
|
||||
assertIndexInMetaState(masterNode, "test");
|
||||
assertIndexInMetaState(clusterManagerNode, "test");
|
||||
}
|
||||
|
||||
public void testIndexFilesAreRemovedIfAllShardsFromIndexRemoved() throws Exception {
|
||||
// this test checks that the index data is removed from a data only node once all shards have been allocated away from it
|
||||
String masterNode = internalCluster().startClusterManagerOnlyNode(Settings.EMPTY);
|
||||
String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(Settings.EMPTY);
|
||||
List<String> nodeNames = internalCluster().startDataOnlyNodes(2);
|
||||
String node1 = nodeNames.get(0);
|
||||
String node2 = nodeNames.get(1);
|
||||
|
@ -90,8 +90,8 @@ public class MetadataNodesIT extends OpenSearchIntegTestCase {
|
|||
Index resolveIndex = resolveIndex(index);
|
||||
assertIndexDirectoryExists(node1, resolveIndex);
|
||||
assertIndexDirectoryDeleted(node2, resolveIndex);
|
||||
assertIndexInMetaState(masterNode, index);
|
||||
assertIndexDirectoryDeleted(masterNode, resolveIndex);
|
||||
assertIndexInMetaState(clusterManagerNode, index);
|
||||
assertIndexDirectoryDeleted(clusterManagerNode, resolveIndex);
|
||||
|
||||
logger.debug("relocating index...");
|
||||
client().admin()
|
||||
|
@ -104,8 +104,8 @@ public class MetadataNodesIT extends OpenSearchIntegTestCase {
|
|||
assertIndexDirectoryDeleted(node1, resolveIndex);
|
||||
assertIndexInMetaState(node2, index);
|
||||
assertIndexDirectoryExists(node2, resolveIndex);
|
||||
assertIndexInMetaState(masterNode, index);
|
||||
assertIndexDirectoryDeleted(masterNode, resolveIndex);
|
||||
assertIndexInMetaState(clusterManagerNode, index);
|
||||
assertIndexDirectoryDeleted(clusterManagerNode, resolveIndex);
|
||||
|
||||
client().admin().indices().prepareDelete(index).get();
|
||||
assertIndexDirectoryDeleted(node1, resolveIndex);
|
||||
|
@ -114,7 +114,7 @@ public class MetadataNodesIT extends OpenSearchIntegTestCase {
|
|||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testMetaWrittenWhenIndexIsClosedAndMetaUpdated() throws Exception {
|
||||
String masterNode = internalCluster().startClusterManagerOnlyNode(Settings.EMPTY);
|
||||
String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(Settings.EMPTY);
|
||||
final String dataNode = internalCluster().startDataOnlyNode(Settings.EMPTY);
|
||||
|
||||
final String index = "index";
|
||||
|
@ -123,7 +123,7 @@ public class MetadataNodesIT extends OpenSearchIntegTestCase {
|
|||
ensureGreen();
|
||||
logger.info("--> wait for meta state written for index");
|
||||
assertIndexInMetaState(dataNode, index);
|
||||
assertIndexInMetaState(masterNode, index);
|
||||
assertIndexInMetaState(clusterManagerNode, index);
|
||||
|
||||
logger.info("--> close index");
|
||||
client().admin().indices().prepareClose(index).get();
|
||||
|
|
|
@ -127,12 +127,12 @@ public class RecoverAfterNodesIT extends OpenSearchIntegTestCase {
|
|||
assertThat(waitForNoBlocksOnNode(BLOCK_WAIT_TIMEOUT, clientNode3).isEmpty(), equalTo(true));
|
||||
}
|
||||
|
||||
public void testRecoverAfterMasterNodes() throws Exception {
|
||||
public void testRecoverAfterClusterManagerNodes() throws Exception {
|
||||
internalCluster().setBootstrapClusterManagerNodeIndex(0);
|
||||
logger.info("--> start master_node (1)");
|
||||
Client master1 = startNode(Settings.builder().put("gateway.recover_after_master_nodes", 2).put(clusterManagerOnlyNode()));
|
||||
logger.info("--> start cluster_manager_node (1)");
|
||||
Client clusterManager1 = startNode(Settings.builder().put("gateway.recover_after_master_nodes", 2).put(clusterManagerOnlyNode()));
|
||||
assertThat(
|
||||
master1.admin()
|
||||
clusterManager1.admin()
|
||||
.cluster()
|
||||
.prepareState()
|
||||
.setLocal(true)
|
||||
|
@ -147,7 +147,7 @@ public class RecoverAfterNodesIT extends OpenSearchIntegTestCase {
|
|||
logger.info("--> start data_node (1)");
|
||||
Client data1 = startNode(Settings.builder().put("gateway.recover_after_master_nodes", 2).put(dataOnlyNode()));
|
||||
assertThat(
|
||||
master1.admin()
|
||||
clusterManager1.admin()
|
||||
.cluster()
|
||||
.prepareState()
|
||||
.setLocal(true)
|
||||
|
@ -174,7 +174,7 @@ public class RecoverAfterNodesIT extends OpenSearchIntegTestCase {
|
|||
logger.info("--> start data_node (2)");
|
||||
Client data2 = startNode(Settings.builder().put("gateway.recover_after_master_nodes", 2).put(dataOnlyNode()));
|
||||
assertThat(
|
||||
master1.admin()
|
||||
clusterManager1.admin()
|
||||
.cluster()
|
||||
.prepareState()
|
||||
.setLocal(true)
|
||||
|
@ -210,20 +210,20 @@ public class RecoverAfterNodesIT extends OpenSearchIntegTestCase {
|
|||
hasItem(GatewayService.STATE_NOT_RECOVERED_BLOCK)
|
||||
);
|
||||
|
||||
logger.info("--> start master_node (2)");
|
||||
Client master2 = startNode(Settings.builder().put("gateway.recover_after_master_nodes", 2).put(clusterManagerOnlyNode()));
|
||||
assertThat(waitForNoBlocksOnNode(BLOCK_WAIT_TIMEOUT, master1).isEmpty(), equalTo(true));
|
||||
assertThat(waitForNoBlocksOnNode(BLOCK_WAIT_TIMEOUT, master2).isEmpty(), equalTo(true));
|
||||
logger.info("--> start cluster_manager_node (2)");
|
||||
Client clusterManager2 = startNode(Settings.builder().put("gateway.recover_after_master_nodes", 2).put(clusterManagerOnlyNode()));
|
||||
assertThat(waitForNoBlocksOnNode(BLOCK_WAIT_TIMEOUT, clusterManager1).isEmpty(), equalTo(true));
|
||||
assertThat(waitForNoBlocksOnNode(BLOCK_WAIT_TIMEOUT, clusterManager2).isEmpty(), equalTo(true));
|
||||
assertThat(waitForNoBlocksOnNode(BLOCK_WAIT_TIMEOUT, data1).isEmpty(), equalTo(true));
|
||||
assertThat(waitForNoBlocksOnNode(BLOCK_WAIT_TIMEOUT, data2).isEmpty(), equalTo(true));
|
||||
}
|
||||
|
||||
public void testRecoverAfterDataNodes() throws Exception {
|
||||
internalCluster().setBootstrapClusterManagerNodeIndex(0);
|
||||
logger.info("--> start master_node (1)");
|
||||
Client master1 = startNode(Settings.builder().put("gateway.recover_after_data_nodes", 2).put(clusterManagerOnlyNode()));
|
||||
logger.info("--> start cluster_manager_node (1)");
|
||||
Client clusterManager1 = startNode(Settings.builder().put("gateway.recover_after_data_nodes", 2).put(clusterManagerOnlyNode()));
|
||||
assertThat(
|
||||
master1.admin()
|
||||
clusterManager1.admin()
|
||||
.cluster()
|
||||
.prepareState()
|
||||
.setLocal(true)
|
||||
|
@ -238,7 +238,7 @@ public class RecoverAfterNodesIT extends OpenSearchIntegTestCase {
|
|||
logger.info("--> start data_node (1)");
|
||||
Client data1 = startNode(Settings.builder().put("gateway.recover_after_data_nodes", 2).put(dataOnlyNode()));
|
||||
assertThat(
|
||||
master1.admin()
|
||||
clusterManager1.admin()
|
||||
.cluster()
|
||||
.prepareState()
|
||||
.setLocal(true)
|
||||
|
@ -262,10 +262,10 @@ public class RecoverAfterNodesIT extends OpenSearchIntegTestCase {
|
|||
hasItem(GatewayService.STATE_NOT_RECOVERED_BLOCK)
|
||||
);
|
||||
|
||||
logger.info("--> start master_node (2)");
|
||||
Client master2 = startNode(Settings.builder().put("gateway.recover_after_data_nodes", 2).put(clusterManagerOnlyNode()));
|
||||
logger.info("--> start cluster_manager_node (2)");
|
||||
Client clusterManager2 = startNode(Settings.builder().put("gateway.recover_after_data_nodes", 2).put(clusterManagerOnlyNode()));
|
||||
assertThat(
|
||||
master2.admin()
|
||||
clusterManager2.admin()
|
||||
.cluster()
|
||||
.prepareState()
|
||||
.setLocal(true)
|
||||
|
@ -289,7 +289,7 @@ public class RecoverAfterNodesIT extends OpenSearchIntegTestCase {
|
|||
hasItem(GatewayService.STATE_NOT_RECOVERED_BLOCK)
|
||||
);
|
||||
assertThat(
|
||||
master2.admin()
|
||||
clusterManager2.admin()
|
||||
.cluster()
|
||||
.prepareState()
|
||||
.setLocal(true)
|
||||
|
@ -303,8 +303,8 @@ public class RecoverAfterNodesIT extends OpenSearchIntegTestCase {
|
|||
|
||||
logger.info("--> start data_node (2)");
|
||||
Client data2 = startNode(Settings.builder().put("gateway.recover_after_data_nodes", 2).put(dataOnlyNode()));
|
||||
assertThat(waitForNoBlocksOnNode(BLOCK_WAIT_TIMEOUT, master1).isEmpty(), equalTo(true));
|
||||
assertThat(waitForNoBlocksOnNode(BLOCK_WAIT_TIMEOUT, master2).isEmpty(), equalTo(true));
|
||||
assertThat(waitForNoBlocksOnNode(BLOCK_WAIT_TIMEOUT, clusterManager1).isEmpty(), equalTo(true));
|
||||
assertThat(waitForNoBlocksOnNode(BLOCK_WAIT_TIMEOUT, clusterManager2).isEmpty(), equalTo(true));
|
||||
assertThat(waitForNoBlocksOnNode(BLOCK_WAIT_TIMEOUT, data1).isEmpty(), equalTo(true));
|
||||
assertThat(waitForNoBlocksOnNode(BLOCK_WAIT_TIMEOUT, data2).isEmpty(), equalTo(true));
|
||||
}
|
||||
|
|
|
@ -445,7 +445,7 @@ public class RecoveryFromGatewayIT extends OpenSearchIntegTestCase {
|
|||
.setSource(jsonBuilder().startObject().field("field", "value3").endObject())
|
||||
.execute()
|
||||
.actionGet();
|
||||
// TODO: remove once refresh doesn't fail immediately if there a master block:
|
||||
// TODO: remove once refresh doesn't fail immediately if there a cluster-manager block:
|
||||
// https://github.com/elastic/elasticsearch/issues/9997
|
||||
// client().admin().cluster().prepareHealth("test").setWaitForYellowStatus().get();
|
||||
logger.info("--> refreshing all indices after indexing is complete");
|
||||
|
@ -665,7 +665,7 @@ public class RecoveryFromGatewayIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
|
||||
public void testStartedShardFoundIfStateNotYetProcessed() throws Exception {
|
||||
// nodes may need to report the shards they processed the initial recovered cluster state from the master
|
||||
// nodes may need to report the shards they processed the initial recovered cluster state from the cluster-manager
|
||||
final String nodeName = internalCluster().startNode();
|
||||
createIndex("test", Settings.builder().put(SETTING_NUMBER_OF_SHARDS, 1).build());
|
||||
final String customDataPath = IndexMetadata.INDEX_DATA_PATH_SETTING.get(
|
||||
|
|
|
@ -76,7 +76,7 @@ public class DynamicMappingIT extends OpenSearchIntegTestCase {
|
|||
assertThat(e.getMessage(), Matchers.containsString("failed to parse field [foo] of type [long]"));
|
||||
} catch (IllegalArgumentException e) {
|
||||
// rare case: the node that processes the index request doesn't have the mappings
|
||||
// yet and sends a mapping update to the master node to map "bar" as "text". This
|
||||
// yet and sends a mapping update to the cluster-manager node to map "bar" as "text". This
|
||||
// fails as it had been already mapped as a long by the previous index request.
|
||||
assertThat(e.getMessage(), Matchers.containsString("mapper [foo] cannot be changed from type [long] to [text]"));
|
||||
}
|
||||
|
@ -140,19 +140,19 @@ public class DynamicMappingIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testPreflightCheckAvoidsMaster() throws InterruptedException {
|
||||
public void testPreflightCheckAvoidsClusterManager() throws InterruptedException {
|
||||
createIndex("index", Settings.builder().put(INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING.getKey(), 2).build());
|
||||
ensureGreen("index");
|
||||
client().prepareIndex("index").setId("1").setSource("field1", "value1").get();
|
||||
|
||||
final CountDownLatch masterBlockedLatch = new CountDownLatch(1);
|
||||
final CountDownLatch clusterManagerBlockedLatch = new CountDownLatch(1);
|
||||
final CountDownLatch indexingCompletedLatch = new CountDownLatch(1);
|
||||
|
||||
internalCluster().getInstance(ClusterService.class, internalCluster().getMasterName())
|
||||
.submitStateUpdateTask("block-state-updates", new ClusterStateUpdateTask() {
|
||||
@Override
|
||||
public ClusterState execute(ClusterState currentState) throws Exception {
|
||||
masterBlockedLatch.countDown();
|
||||
clusterManagerBlockedLatch.countDown();
|
||||
indexingCompletedLatch.await();
|
||||
return currentState;
|
||||
}
|
||||
|
@ -163,7 +163,7 @@ public class DynamicMappingIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
});
|
||||
|
||||
masterBlockedLatch.await();
|
||||
clusterManagerBlockedLatch.await();
|
||||
final IndexRequestBuilder indexRequestBuilder = client().prepareIndex("index").setId("2").setSource("field2", "value2");
|
||||
try {
|
||||
assertThat(
|
||||
|
|
|
@ -38,7 +38,7 @@ import static org.opensearch.test.OpenSearchIntegTestCase.ClusterScope;
|
|||
import static org.opensearch.test.OpenSearchIntegTestCase.Scope;
|
||||
|
||||
@ClusterScope(scope = Scope.TEST, numDataNodes = 0)
|
||||
public class DedicatedMasterGetFieldMappingIT extends SimpleGetFieldMappingsIT {
|
||||
public class DedicatedClusterManagerGetFieldMappingIT extends SimpleGetFieldMappingsIT {
|
||||
|
||||
@Before
|
||||
public void before1() throws Exception {
|
|
@ -386,13 +386,13 @@ public class UpdateMappingIntegrationIT extends OpenSearchIntegTestCase {
|
|||
assertNotNull("field " + fieldName + " doesn't exists on " + node, fieldType);
|
||||
}
|
||||
}
|
||||
assertMappingOnMaster(index, fieldNames);
|
||||
assertMappingOnClusterManager(index, fieldNames);
|
||||
}
|
||||
|
||||
/**
|
||||
* Waits for the given mapping type to exists on the master node.
|
||||
* Waits for the given mapping type to exists on the cluster-manager node.
|
||||
*/
|
||||
private void assertMappingOnMaster(final String index, final String... fieldNames) {
|
||||
private void assertMappingOnClusterManager(final String index, final String... fieldNames) {
|
||||
GetMappingsResponse response = client().admin().indices().prepareGetMappings(index).get();
|
||||
MappingMetadata mappings = response.getMappings().get(index);
|
||||
assertThat(mappings, notNullValue());
|
||||
|
|
|
@ -852,7 +852,7 @@ public class IndexRecoveryIT extends OpenSearchIntegTestCase {
|
|||
.put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "500ms")
|
||||
.put(RecoverySettings.INDICES_RECOVERY_INTERNAL_ACTION_TIMEOUT_SETTING.getKey(), "10s")
|
||||
.build();
|
||||
// start a master node
|
||||
// start a cluster-manager node
|
||||
internalCluster().startNode(nodeSettings);
|
||||
|
||||
final String blueNodeName = internalCluster().startNode(
|
||||
|
@ -1054,7 +1054,7 @@ public class IndexRecoveryIT extends OpenSearchIntegTestCase {
|
|||
.put(RecoverySettings.INDICES_RECOVERY_INTERNAL_ACTION_TIMEOUT_SETTING.getKey(), "1s")
|
||||
.put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "1s")
|
||||
.build();
|
||||
// start a master node
|
||||
// start a cluster-manager node
|
||||
internalCluster().startNode(nodeSettings);
|
||||
|
||||
final String blueNodeName = internalCluster().startNode(
|
||||
|
@ -1211,8 +1211,8 @@ public class IndexRecoveryIT extends OpenSearchIntegTestCase {
|
|||
)
|
||||
.build();
|
||||
TimeValue disconnectAfterDelay = TimeValue.timeValueMillis(randomIntBetween(0, 100));
|
||||
// start a master node
|
||||
String masterNodeName = internalCluster().startClusterManagerOnlyNode(nodeSettings);
|
||||
// start a cluster-manager node
|
||||
String clusterManagerNodeName = internalCluster().startClusterManagerOnlyNode(nodeSettings);
|
||||
|
||||
final String blueNodeName = internalCluster().startNode(
|
||||
Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build()
|
||||
|
@ -1239,9 +1239,9 @@ public class IndexRecoveryIT extends OpenSearchIntegTestCase {
|
|||
ensureSearchable(indexName);
|
||||
assertHitCount(client().prepareSearch(indexName).get(), numDocs);
|
||||
|
||||
MockTransportService masterTransportService = (MockTransportService) internalCluster().getInstance(
|
||||
MockTransportService clusterManagerTransportService = (MockTransportService) internalCluster().getInstance(
|
||||
TransportService.class,
|
||||
masterNodeName
|
||||
clusterManagerNodeName
|
||||
);
|
||||
MockTransportService blueMockTransportService = (MockTransportService) internalCluster().getInstance(
|
||||
TransportService.class,
|
||||
|
@ -1312,7 +1312,7 @@ public class IndexRecoveryIT extends OpenSearchIntegTestCase {
|
|||
});
|
||||
|
||||
for (MockTransportService mockTransportService : Arrays.asList(redMockTransportService, blueMockTransportService)) {
|
||||
mockTransportService.addSendBehavior(masterTransportService, (connection, requestId, action, request, options) -> {
|
||||
mockTransportService.addSendBehavior(clusterManagerTransportService, (connection, requestId, action, request, options) -> {
|
||||
logger.info("--> sending request {} on {}", action, connection.getNode());
|
||||
if ((primaryRelocation && finalized.get()) == false) {
|
||||
assertNotEquals(action, ShardStateAction.SHARD_FAILED_ACTION_NAME);
|
||||
|
@ -1466,8 +1466,8 @@ public class IndexRecoveryIT extends OpenSearchIntegTestCase {
|
|||
assertHitCount(client().prepareSearch().get(), numDocs);
|
||||
}
|
||||
|
||||
/** Makes sure the new master does not repeatedly fetch index metadata from recovering replicas */
|
||||
public void testOngoingRecoveryAndMasterFailOver() throws Exception {
|
||||
/** Makes sure the new cluster-manager does not repeatedly fetch index metadata from recovering replicas */
|
||||
public void testOngoingRecoveryAndClusterManagerFailOver() throws Exception {
|
||||
String indexName = "test";
|
||||
internalCluster().startNodes(2);
|
||||
String nodeWithPrimary = internalCluster().startDataOnlyNode();
|
||||
|
|
|
@ -147,7 +147,7 @@ public class CloseWhileRelocatingShardsIT extends OpenSearchIntegTestCase {
|
|||
);
|
||||
|
||||
final String targetNode = internalCluster().startDataOnlyNode();
|
||||
ensureClusterSizeConsistency(); // wait for the master to finish processing join.
|
||||
ensureClusterSizeConsistency(); // wait for the cluster-manager to finish processing join.
|
||||
|
||||
try {
|
||||
final ClusterService clusterService = internalCluster().getInstance(ClusterService.class, internalCluster().getMasterName());
|
||||
|
|
|
@ -105,7 +105,7 @@ public class IndicesStoreIntegrationIT extends OpenSearchIntegTestCase {
|
|||
|
||||
@Override
|
||||
protected void ensureClusterStateConsistency() throws IOException {
|
||||
// testShardActiveElseWhere might change the state of a non-master node
|
||||
// testShardActiveElseWhere might change the state of a non-cluster-manager node
|
||||
// so we cannot check state consistency of this cluster
|
||||
}
|
||||
|
||||
|
|
|
@ -91,7 +91,7 @@ public class IngestProcessorNotInstalledOnAllNodesIT extends OpenSearchIntegTest
|
|||
}
|
||||
}
|
||||
|
||||
public void testFailPipelineCreationProcessorNotInstalledOnMasterNode() throws Exception {
|
||||
public void testFailPipelineCreationProcessorNotInstalledOnClusterManagerNode() throws Exception {
|
||||
internalCluster().startNode();
|
||||
installPlugin = true;
|
||||
internalCluster().startNode();
|
||||
|
|
|
@ -403,7 +403,7 @@ public class PersistentTasksExecutorIT extends OpenSearchIntegTestCase {
|
|||
|
||||
PlainActionFuture<PersistentTask<?>> unassignmentFuture = new PlainActionFuture<>();
|
||||
|
||||
// Disallow re-assignment after it is unallocated to verify master and node state
|
||||
// Disallow re-assignment after it is unallocated to verify cluster-manager and node state
|
||||
TestPersistentTasksExecutor.setNonClusterStateCondition(false);
|
||||
|
||||
persistentTasksClusterService.unassignPersistentTask(taskId, task.getAllocationId() + 1, "unassignment test", unassignmentFuture);
|
||||
|
|
|
@ -199,7 +199,7 @@ public class FullRollingRestartIT extends OpenSearchIntegTestCase {
|
|||
internalCluster().startClusterManagerOnlyNode(Settings.EMPTY);
|
||||
internalCluster().startDataOnlyNodes(3);
|
||||
/**
|
||||
* We start 3 nodes and a dedicated master. Restart on of the data-nodes and ensure that we got no relocations.
|
||||
* We start 3 nodes and a dedicated cluster-manager. Restart on of the data-nodes and ensure that we got no relocations.
|
||||
* Yet we have 6 shards 0 replica so that means if the restarting node comes back both other nodes are subject
|
||||
* to relocating to the restarting node since all had 2 shards and now one node has nothing allocated.
|
||||
* We have a fix for this to wait until we have allocated unallocated shards now so this shouldn't happen.
|
||||
|
|
|
@ -51,11 +51,11 @@ import static org.hamcrest.Matchers.is;
|
|||
@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0)
|
||||
public class BlobStoreRepositoryCleanupIT extends AbstractSnapshotIntegTestCase {
|
||||
|
||||
public void testMasterFailoverDuringCleanup() throws Exception {
|
||||
public void testClusterManagerFailoverDuringCleanup() throws Exception {
|
||||
startBlockedCleanup("test-repo");
|
||||
|
||||
final int nodeCount = internalCluster().numDataAndMasterNodes();
|
||||
logger.info("--> stopping master node");
|
||||
logger.info("--> stopping cluster-manager node");
|
||||
internalCluster().stopCurrentMasterNode();
|
||||
|
||||
ensureStableCluster(nodeCount - 1);
|
||||
|
@ -67,7 +67,7 @@ public class BlobStoreRepositoryCleanupIT extends AbstractSnapshotIntegTestCase
|
|||
}
|
||||
|
||||
public void testRepeatCleanupsDontRemove() throws Exception {
|
||||
final String masterNode = startBlockedCleanup("test-repo");
|
||||
final String clusterManagerNode = startBlockedCleanup("test-repo");
|
||||
|
||||
logger.info("--> sending another cleanup");
|
||||
assertFutureThrows(client().admin().cluster().prepareCleanupRepository("test-repo").execute(), IllegalStateException.class);
|
||||
|
@ -81,8 +81,8 @@ public class BlobStoreRepositoryCleanupIT extends AbstractSnapshotIntegTestCase
|
|||
.custom(RepositoryCleanupInProgress.TYPE);
|
||||
assertTrue(cleanup.hasCleanupInProgress());
|
||||
|
||||
logger.info("--> unblocking master node");
|
||||
unblockNode("test-repo", masterNode);
|
||||
logger.info("--> unblocking cluster-manager node");
|
||||
unblockNode("test-repo", clusterManagerNode);
|
||||
|
||||
logger.info("--> wait for cleanup to finish and disappear from cluster state");
|
||||
awaitClusterState(
|
||||
|
@ -91,7 +91,7 @@ public class BlobStoreRepositoryCleanupIT extends AbstractSnapshotIntegTestCase
|
|||
}
|
||||
|
||||
private String startBlockedCleanup(String repoName) throws Exception {
|
||||
logger.info("--> starting two master nodes and one data node");
|
||||
logger.info("--> starting two cluster-manager nodes and one data node");
|
||||
internalCluster().startMasterOnlyNodes(2);
|
||||
internalCluster().startDataOnlyNodes(1);
|
||||
|
||||
|
@ -117,17 +117,17 @@ public class BlobStoreRepositoryCleanupIT extends AbstractSnapshotIntegTestCase
|
|||
);
|
||||
garbageFuture.get();
|
||||
|
||||
final String masterNode = blockMasterFromFinalizingSnapshotOnIndexFile(repoName);
|
||||
final String clusterManagerNode = blockMasterFromFinalizingSnapshotOnIndexFile(repoName);
|
||||
|
||||
logger.info("--> starting repository cleanup");
|
||||
client().admin().cluster().prepareCleanupRepository(repoName).execute();
|
||||
|
||||
logger.info("--> waiting for block to kick in on " + masterNode);
|
||||
waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(60));
|
||||
logger.info("--> waiting for block to kick in on " + clusterManagerNode);
|
||||
waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(60));
|
||||
awaitClusterState(
|
||||
state -> state.custom(RepositoryCleanupInProgress.TYPE, RepositoryCleanupInProgress.EMPTY).hasCleanupInProgress()
|
||||
);
|
||||
return masterNode;
|
||||
return clusterManagerNode;
|
||||
}
|
||||
|
||||
public void testCleanupOldIndexN() throws ExecutionException, InterruptedException {
|
||||
|
|
|
@ -234,7 +234,7 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
public void testLongRunningCloneAllowsConcurrentSnapshot() throws Exception {
|
||||
// large snapshot pool so blocked snapshot threads from cloning don't prevent concurrent snapshot finalizations
|
||||
final String masterNode = internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS);
|
||||
final String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS);
|
||||
internalCluster().startDataOnlyNode();
|
||||
final String repoName = "test-repo";
|
||||
createRepository(repoName, "mock");
|
||||
|
@ -245,9 +245,9 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
createFullSnapshot(repoName, sourceSnapshot);
|
||||
|
||||
final String targetSnapshot = "target-snapshot";
|
||||
blockMasterOnShardClone(repoName);
|
||||
blockClusterManagerOnShardClone(repoName);
|
||||
final ActionFuture<AcknowledgedResponse> cloneFuture = startClone(repoName, sourceSnapshot, targetSnapshot, indexSlow);
|
||||
waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
|
||||
final String indexFast = "index-fast";
|
||||
createIndexWithRandomDocs(indexFast, randomIntBetween(20, 100));
|
||||
|
@ -257,7 +257,7 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
);
|
||||
|
||||
assertThat(cloneFuture.isDone(), is(false));
|
||||
unblockNode(repoName, masterNode);
|
||||
unblockNode(repoName, clusterManagerNode);
|
||||
|
||||
assertAcked(cloneFuture.get());
|
||||
}
|
||||
|
@ -323,7 +323,7 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
public void testBackToBackClonesForIndexNotInCluster() throws Exception {
|
||||
// large snapshot pool so blocked snapshot threads from cloning don't prevent concurrent snapshot finalizations
|
||||
final String masterNode = internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS);
|
||||
final String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS);
|
||||
internalCluster().startDataOnlyNode();
|
||||
final String repoName = "test-repo";
|
||||
createRepository(repoName, "mock");
|
||||
|
@ -336,9 +336,9 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
assertAcked(admin().indices().prepareDelete(indexBlocked).get());
|
||||
|
||||
final String targetSnapshot1 = "target-snapshot";
|
||||
blockMasterOnShardClone(repoName);
|
||||
blockClusterManagerOnShardClone(repoName);
|
||||
final ActionFuture<AcknowledgedResponse> cloneFuture1 = startClone(repoName, sourceSnapshot, targetSnapshot1, indexBlocked);
|
||||
waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
assertThat(cloneFuture1.isDone(), is(false));
|
||||
|
||||
final int extraClones = randomIntBetween(1, 5);
|
||||
|
@ -366,7 +366,7 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
assertFalse(extraSnapshotFuture.isDone());
|
||||
}
|
||||
|
||||
unblockNode(repoName, masterNode);
|
||||
unblockNode(repoName, clusterManagerNode);
|
||||
assertAcked(cloneFuture1.get());
|
||||
|
||||
for (ActionFuture<AcknowledgedResponse> extraCloneFuture : extraCloneFutures) {
|
||||
|
@ -377,7 +377,7 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testMasterFailoverDuringCloneStep1() throws Exception {
|
||||
public void testClusterManagerFailoverDuringCloneStep1() throws Exception {
|
||||
internalCluster().startMasterOnlyNodes(3);
|
||||
internalCluster().startDataOnlyNode();
|
||||
final String repoName = "test-repo";
|
||||
|
@ -388,13 +388,13 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
final String sourceSnapshot = "source-snapshot";
|
||||
createFullSnapshot(repoName, sourceSnapshot);
|
||||
|
||||
blockMasterOnReadIndexMeta(repoName);
|
||||
blockClusterManagerOnReadIndexMeta(repoName);
|
||||
final String cloneName = "target-snapshot";
|
||||
final ActionFuture<AcknowledgedResponse> cloneFuture = startCloneFromDataNode(repoName, sourceSnapshot, cloneName, testIndex);
|
||||
awaitNumberOfSnapshotsInProgress(1);
|
||||
final String masterNode = internalCluster().getMasterName();
|
||||
waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
internalCluster().restartNode(masterNode);
|
||||
final String clusterManagerNode = internalCluster().getMasterName();
|
||||
waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
internalCluster().restartNode(clusterManagerNode);
|
||||
boolean cloneSucceeded = false;
|
||||
try {
|
||||
cloneFuture.actionGet(TimeValue.timeValueSeconds(30L));
|
||||
|
@ -406,7 +406,8 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
awaitNoMoreRunningOperations(internalCluster().getMasterName());
|
||||
|
||||
// Check if the clone operation worked out by chance as a result of the clone request being retried because of the master failover
|
||||
// Check if the clone operation worked out by chance as a result of the clone request being retried
|
||||
// because of the cluster-manager failover
|
||||
cloneSucceeded = cloneSucceeded
|
||||
|| getRepositoryData(repoName).getSnapshotIds().stream().anyMatch(snapshotId -> snapshotId.getName().equals(cloneName));
|
||||
assertAllSnapshotsSuccessful(getRepositoryData(repoName), cloneSucceeded ? 2 : 1);
|
||||
|
@ -430,7 +431,7 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
);
|
||||
}
|
||||
|
||||
public void testMasterFailoverDuringCloneStep2() throws Exception {
|
||||
public void testClusterManagerFailoverDuringCloneStep2() throws Exception {
|
||||
// large snapshot pool so blocked snapshot threads from cloning don't prevent concurrent snapshot finalizations
|
||||
internalCluster().startMasterOnlyNodes(3, LARGE_SNAPSHOT_POOL_SETTINGS);
|
||||
internalCluster().startDataOnlyNode();
|
||||
|
@ -443,12 +444,12 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
createFullSnapshot(repoName, sourceSnapshot);
|
||||
|
||||
final String targetSnapshot = "target-snapshot";
|
||||
blockMasterOnShardClone(repoName);
|
||||
blockClusterManagerOnShardClone(repoName);
|
||||
final ActionFuture<AcknowledgedResponse> cloneFuture = startCloneFromDataNode(repoName, sourceSnapshot, targetSnapshot, testIndex);
|
||||
awaitNumberOfSnapshotsInProgress(1);
|
||||
final String masterNode = internalCluster().getMasterName();
|
||||
waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
internalCluster().restartNode(masterNode);
|
||||
final String clusterManagerNode = internalCluster().getMasterName();
|
||||
waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
internalCluster().restartNode(clusterManagerNode);
|
||||
expectThrows(SnapshotException.class, cloneFuture::actionGet);
|
||||
awaitNoMoreRunningOperations(internalCluster().getMasterName());
|
||||
|
||||
|
@ -471,9 +472,9 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
blockMasterFromFinalizingSnapshotOnSnapFile(repoName);
|
||||
final ActionFuture<AcknowledgedResponse> cloneFuture = startCloneFromDataNode(repoName, sourceSnapshot, targetSnapshot, testIndex);
|
||||
awaitNumberOfSnapshotsInProgress(1);
|
||||
final String masterNode = internalCluster().getMasterName();
|
||||
waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
unblockNode(repoName, masterNode);
|
||||
final String clusterManagerNode = internalCluster().getMasterName();
|
||||
waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
unblockNode(repoName, clusterManagerNode);
|
||||
expectThrows(SnapshotException.class, cloneFuture::actionGet);
|
||||
awaitNoMoreRunningOperations(internalCluster().getMasterName());
|
||||
assertAllSnapshotsSuccessful(getRepositoryData(repoName), 1);
|
||||
|
@ -490,8 +491,8 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
final String sourceSnapshot = "source-snapshot";
|
||||
blockDataNode(repoName, dataNode);
|
||||
final Client masterClient = internalCluster().masterClient();
|
||||
final ActionFuture<CreateSnapshotResponse> sourceSnapshotFuture = masterClient.admin()
|
||||
final Client clusterManagerClient = internalCluster().masterClient();
|
||||
final ActionFuture<CreateSnapshotResponse> sourceSnapshotFuture = clusterManagerClient.admin()
|
||||
.cluster()
|
||||
.prepareCreateSnapshot(repoName, sourceSnapshot)
|
||||
.setWaitForCompletion(true)
|
||||
|
@ -503,7 +504,7 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
final SnapshotException sne = expectThrows(
|
||||
SnapshotException.class,
|
||||
() -> startClone(masterClient, repoName, sourceSnapshot, "target-snapshot", testIndex).actionGet(
|
||||
() -> startClone(clusterManagerClient, repoName, sourceSnapshot, "target-snapshot", testIndex).actionGet(
|
||||
TimeValue.timeValueSeconds(30L)
|
||||
)
|
||||
);
|
||||
|
@ -516,7 +517,7 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
}
|
||||
|
||||
public void testStartSnapshotWithSuccessfulShardClonePendingFinalization() throws Exception {
|
||||
final String masterName = internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS);
|
||||
final String clusterManagerName = internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS);
|
||||
final String dataNode = internalCluster().startDataOnlyNode();
|
||||
final String repoName = "test-repo";
|
||||
createRepository(repoName, "mock");
|
||||
|
@ -530,15 +531,15 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
blockMasterOnWriteIndexFile(repoName);
|
||||
final String cloneName = "clone-blocked";
|
||||
final ActionFuture<AcknowledgedResponse> blockedClone = startClone(repoName, sourceSnapshot, cloneName, indexName);
|
||||
waitForBlock(masterName, repoName, TimeValue.timeValueSeconds(30L));
|
||||
waitForBlock(clusterManagerName, repoName, TimeValue.timeValueSeconds(30L));
|
||||
awaitNumberOfSnapshotsInProgress(1);
|
||||
blockNodeOnAnyFiles(repoName, dataNode);
|
||||
final ActionFuture<CreateSnapshotResponse> otherSnapshot = startFullSnapshot(repoName, "other-snapshot");
|
||||
awaitNumberOfSnapshotsInProgress(2);
|
||||
assertFalse(blockedClone.isDone());
|
||||
unblockNode(repoName, masterName);
|
||||
unblockNode(repoName, clusterManagerName);
|
||||
awaitNumberOfSnapshotsInProgress(1);
|
||||
awaitMasterFinishRepoOperations();
|
||||
awaitClusterManagerFinishRepoOperations();
|
||||
unblockNode(repoName, dataNode);
|
||||
assertAcked(blockedClone.get());
|
||||
assertEquals(getSnapshot(repoName, cloneName).state(), SnapshotState.SUCCESS);
|
||||
|
@ -568,7 +569,7 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
assertFalse(blockedClone.isDone());
|
||||
unblockNode(repoName, clusterManagerName);
|
||||
awaitNoMoreRunningOperations(clusterManagerName);
|
||||
awaitMasterFinishRepoOperations();
|
||||
awaitClusterManagerFinishRepoOperations();
|
||||
assertAcked(blockedClone.get());
|
||||
assertAcked(otherClone.get());
|
||||
assertEquals(getSnapshot(repoName, cloneName).state(), SnapshotState.SUCCESS);
|
||||
|
@ -576,7 +577,7 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
}
|
||||
|
||||
public void testStartCloneWithSuccessfulShardSnapshotPendingFinalization() throws Exception {
|
||||
final String masterName = internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS);
|
||||
final String clusterManagerName = internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS);
|
||||
internalCluster().startDataOnlyNode();
|
||||
final String repoName = "test-repo";
|
||||
createRepository(repoName, "mock");
|
||||
|
@ -589,7 +590,7 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
blockMasterOnWriteIndexFile(repoName);
|
||||
final ActionFuture<CreateSnapshotResponse> blockedSnapshot = startFullSnapshot(repoName, "snap-blocked");
|
||||
waitForBlock(masterName, repoName, TimeValue.timeValueSeconds(30L));
|
||||
waitForBlock(clusterManagerName, repoName, TimeValue.timeValueSeconds(30L));
|
||||
awaitNumberOfSnapshotsInProgress(1);
|
||||
final String cloneName = "clone";
|
||||
final ActionFuture<AcknowledgedResponse> clone = startClone(repoName, sourceSnapshot, cloneName, indexName);
|
||||
|
@ -602,11 +603,11 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
});
|
||||
assertFalse(blockedSnapshot.isDone());
|
||||
} finally {
|
||||
unblockNode(repoName, masterName);
|
||||
unblockNode(repoName, clusterManagerName);
|
||||
}
|
||||
awaitNoMoreRunningOperations();
|
||||
|
||||
awaitMasterFinishRepoOperations();
|
||||
awaitClusterManagerFinishRepoOperations();
|
||||
|
||||
assertSuccessful(blockedSnapshot);
|
||||
assertAcked(clone.get());
|
||||
|
@ -641,12 +642,12 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
|
|||
return client.admin().cluster().prepareCloneSnapshot(repoName, sourceSnapshot, targetSnapshot).setIndices(indices).execute();
|
||||
}
|
||||
|
||||
private void blockMasterOnReadIndexMeta(String repoName) {
|
||||
private void blockClusterManagerOnReadIndexMeta(String repoName) {
|
||||
((MockRepository) internalCluster().getCurrentMasterNodeInstance(RepositoriesService.class).repository(repoName))
|
||||
.setBlockOnReadIndexMeta();
|
||||
}
|
||||
|
||||
private void blockMasterOnShardClone(String repoName) {
|
||||
private void blockClusterManagerOnShardClone(String repoName) {
|
||||
((MockRepository) internalCluster().getCurrentMasterNodeInstance(RepositoriesService.class).repository(repoName))
|
||||
.setBlockOnWriteShardLevelMeta();
|
||||
}
|
||||
|
|
|
@ -280,7 +280,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
}
|
||||
|
||||
public void testMultipleReposAreIndependent3() throws Exception {
|
||||
final String masterNode = internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS);
|
||||
final String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS);
|
||||
internalCluster().startDataOnlyNode();
|
||||
final String blockedRepoName = "test-repo-blocked";
|
||||
final String otherRepoName = "test-repo";
|
||||
|
@ -289,14 +289,14 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
createIndexWithContent("test-index");
|
||||
|
||||
createFullSnapshot(blockedRepoName, "blocked-snapshot");
|
||||
blockNodeOnAnyFiles(blockedRepoName, masterNode);
|
||||
blockNodeOnAnyFiles(blockedRepoName, clusterManagerNode);
|
||||
final ActionFuture<AcknowledgedResponse> slowDeleteFuture = startDeleteSnapshot(blockedRepoName, "*");
|
||||
|
||||
logger.info("--> waiting for concurrent snapshot(s) to finish");
|
||||
createNSnapshots(otherRepoName, randomIntBetween(1, 5));
|
||||
assertAcked(startDeleteSnapshot(otherRepoName, "*").get());
|
||||
|
||||
unblockNode(blockedRepoName, masterNode);
|
||||
unblockNode(blockedRepoName, clusterManagerNode);
|
||||
assertAcked(slowDeleteFuture.actionGet());
|
||||
}
|
||||
|
||||
|
@ -447,7 +447,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
assertThat(client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(), empty());
|
||||
}
|
||||
|
||||
public void testMasterFailOverWithQueuedDeletes() throws Exception {
|
||||
public void testClusterManagerFailOverWithQueuedDeletes() throws Exception {
|
||||
internalCluster().startMasterOnlyNodes(3);
|
||||
final String dataNode = internalCluster().startDataOnlyNode();
|
||||
final String repoName = "test-repo";
|
||||
|
@ -458,7 +458,10 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
final String firstSnapshot = "snapshot-one";
|
||||
blockDataNode(repoName, dataNode);
|
||||
final ActionFuture<CreateSnapshotResponse> firstSnapshotResponse = startFullSnapshotFromNonMasterClient(repoName, firstSnapshot);
|
||||
final ActionFuture<CreateSnapshotResponse> firstSnapshotResponse = startFullSnapshotFromNonClusterManagerClient(
|
||||
repoName,
|
||||
firstSnapshot
|
||||
);
|
||||
waitForBlock(dataNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
|
||||
final String dataNode2 = internalCluster().startDataOnlyNode();
|
||||
|
@ -475,11 +478,14 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
return snapshotsInProgress.entries().size() == 2 && snapshotHasCompletedShard(secondSnapshot, snapshotsInProgress);
|
||||
});
|
||||
|
||||
final ActionFuture<AcknowledgedResponse> firstDeleteFuture = startDeleteFromNonMasterClient(repoName, firstSnapshot);
|
||||
final ActionFuture<AcknowledgedResponse> firstDeleteFuture = startDeleteFromNonClusterManagerClient(repoName, firstSnapshot);
|
||||
awaitNDeletionsInProgress(1);
|
||||
|
||||
blockNodeOnAnyFiles(repoName, dataNode2);
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotThreeFuture = startFullSnapshotFromNonMasterClient(repoName, "snapshot-three");
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotThreeFuture = startFullSnapshotFromNonClusterManagerClient(
|
||||
repoName,
|
||||
"snapshot-three"
|
||||
);
|
||||
waitForBlock(dataNode2, repoName, TimeValue.timeValueSeconds(30L));
|
||||
|
||||
assertThat(firstSnapshotResponse.isDone(), is(false));
|
||||
|
@ -488,7 +494,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
logger.info("--> waiting for all three snapshots to show up as in-progress");
|
||||
assertBusy(() -> assertThat(currentSnapshots(repoName), hasSize(3)), 30L, TimeUnit.SECONDS);
|
||||
|
||||
final ActionFuture<AcknowledgedResponse> deleteAllSnapshots = startDeleteFromNonMasterClient(repoName, "*");
|
||||
final ActionFuture<AcknowledgedResponse> deleteAllSnapshots = startDeleteFromNonClusterManagerClient(repoName, "*");
|
||||
logger.info("--> wait for delete to be enqueued in cluster state");
|
||||
awaitClusterState(state -> {
|
||||
final SnapshotDeletionsInProgress deletionsInProgress = state.custom(SnapshotDeletionsInProgress.TYPE);
|
||||
|
@ -506,7 +512,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
}
|
||||
}, 30L, TimeUnit.SECONDS);
|
||||
|
||||
logger.info("--> stopping current master node");
|
||||
logger.info("--> stopping current cluster-manager node");
|
||||
internalCluster().stopCurrentMasterNode();
|
||||
|
||||
unblockNode(repoName, dataNode);
|
||||
|
@ -516,13 +522,14 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
try {
|
||||
assertAcked(deleteFuture.actionGet());
|
||||
} catch (RepositoryException rex) {
|
||||
// rarely the master node fails over twice when shutting down the initial master and fails the transport listener
|
||||
// rarely the cluster-manager node fails over twice
|
||||
// when shutting down the initial cluster-manager and fails the transport listener
|
||||
assertThat(rex.repository(), is("_all"));
|
||||
assertThat(rex.getMessage(), endsWith("Failed to update cluster state during repository operation"));
|
||||
} catch (SnapshotMissingException sme) {
|
||||
// very rarely a master node fail-over happens at such a time that the client on the data-node sees a disconnect exception
|
||||
// after the master has already started the delete, leading to the delete retry to run into a situation where the
|
||||
// snapshot has already been deleted potentially
|
||||
// very rarely a cluster-manager node fail-over happens at such a time
|
||||
// that the client on the data-node sees a disconnect exception after the cluster-manager has already started the delete,
|
||||
// leading to the delete retry to run into a situation where the snapshot has already been deleted potentially
|
||||
assertThat(sme.getSnapshotName(), is(firstSnapshot));
|
||||
}
|
||||
}
|
||||
|
@ -551,7 +558,10 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
index(testIndex, "_doc", "some_id", "foo", "bar");
|
||||
|
||||
blockDataNode(repoName, dataNode);
|
||||
final ActionFuture<CreateSnapshotResponse> firstSnapshotResponse = startFullSnapshotFromMasterClient(repoName, "snapshot-one");
|
||||
final ActionFuture<CreateSnapshotResponse> firstSnapshotResponse = startFullSnapshotFromClusterManagerClient(
|
||||
repoName,
|
||||
"snapshot-one"
|
||||
);
|
||||
waitForBlock(dataNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
|
||||
internalCluster().startDataOnlyNode();
|
||||
|
@ -559,7 +569,10 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
ensureGreen(testIndex);
|
||||
|
||||
final String secondSnapshot = "snapshot-two";
|
||||
final ActionFuture<CreateSnapshotResponse> secondSnapshotResponse = startFullSnapshotFromMasterClient(repoName, secondSnapshot);
|
||||
final ActionFuture<CreateSnapshotResponse> secondSnapshotResponse = startFullSnapshotFromClusterManagerClient(
|
||||
repoName,
|
||||
secondSnapshot
|
||||
);
|
||||
|
||||
// make sure second snapshot is in progress before restarting data node
|
||||
waitUntilInprogress(repoName, secondSnapshot, TimeValue.timeValueSeconds(5L));
|
||||
|
@ -627,7 +640,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
assertThat(client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(), empty());
|
||||
}
|
||||
|
||||
public void testQueuedOperationsOnMasterRestart() throws Exception {
|
||||
public void testQueuedOperationsOnClusterManagerRestart() throws Exception {
|
||||
internalCluster().startMasterOnlyNodes(3);
|
||||
internalCluster().startDataOnlyNode();
|
||||
final String repoName = "test-repo";
|
||||
|
@ -648,7 +661,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
awaitNoMoreRunningOperations();
|
||||
}
|
||||
|
||||
public void testQueuedOperationsOnMasterDisconnect() throws Exception {
|
||||
public void testQueuedOperationsOnClusterManagerDisconnect() throws Exception {
|
||||
internalCluster().startMasterOnlyNodes(3);
|
||||
final String dataNode = internalCluster().startDataOnlyNode();
|
||||
final String repoName = "test-repo";
|
||||
|
@ -656,25 +669,25 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
createIndexWithContent("index-one");
|
||||
createNSnapshots(repoName, randomIntBetween(2, 5));
|
||||
|
||||
final String masterNode = internalCluster().getMasterName();
|
||||
final NetworkDisruption networkDisruption = isolateMasterDisruption(NetworkDisruption.DISCONNECT);
|
||||
final String clusterManagerNode = internalCluster().getMasterName();
|
||||
final NetworkDisruption networkDisruption = isolateClusterManagerDisruption(NetworkDisruption.DISCONNECT);
|
||||
internalCluster().setDisruptionScheme(networkDisruption);
|
||||
|
||||
blockNodeOnAnyFiles(repoName, masterNode);
|
||||
ActionFuture<AcknowledgedResponse> firstDeleteFuture = client(masterNode).admin()
|
||||
blockNodeOnAnyFiles(repoName, clusterManagerNode);
|
||||
ActionFuture<AcknowledgedResponse> firstDeleteFuture = client(clusterManagerNode).admin()
|
||||
.cluster()
|
||||
.prepareDeleteSnapshot(repoName, "*")
|
||||
.execute();
|
||||
waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
|
||||
final ActionFuture<CreateSnapshotResponse> createThirdSnapshot = client(masterNode).admin()
|
||||
final ActionFuture<CreateSnapshotResponse> createThirdSnapshot = client(clusterManagerNode).admin()
|
||||
.cluster()
|
||||
.prepareCreateSnapshot(repoName, "snapshot-three")
|
||||
.setWaitForCompletion(true)
|
||||
.execute();
|
||||
awaitNumberOfSnapshotsInProgress(1);
|
||||
|
||||
final ActionFuture<AcknowledgedResponse> secondDeleteFuture = client(masterNode).admin()
|
||||
final ActionFuture<AcknowledgedResponse> secondDeleteFuture = client(clusterManagerNode).admin()
|
||||
.cluster()
|
||||
.prepareDeleteSnapshot(repoName, "*")
|
||||
.execute();
|
||||
|
@ -682,7 +695,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
networkDisruption.startDisrupting();
|
||||
ensureStableCluster(3, dataNode);
|
||||
unblockNode(repoName, masterNode);
|
||||
unblockNode(repoName, clusterManagerNode);
|
||||
networkDisruption.stopDisrupting();
|
||||
|
||||
logger.info("--> make sure all failing requests get a response");
|
||||
|
@ -693,7 +706,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
awaitNoMoreRunningOperations();
|
||||
}
|
||||
|
||||
public void testQueuedOperationsOnMasterDisconnectAndRepoFailure() throws Exception {
|
||||
public void testQueuedOperationsOnClusterManagerDisconnectAndRepoFailure() throws Exception {
|
||||
internalCluster().startMasterOnlyNodes(3);
|
||||
final String dataNode = internalCluster().startDataOnlyNode();
|
||||
final String repoName = "test-repo";
|
||||
|
@ -701,23 +714,23 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
createIndexWithContent("index-one");
|
||||
createNSnapshots(repoName, randomIntBetween(2, 5));
|
||||
|
||||
final String masterNode = internalCluster().getMasterName();
|
||||
final NetworkDisruption networkDisruption = isolateMasterDisruption(NetworkDisruption.DISCONNECT);
|
||||
final String clusterManagerNode = internalCluster().getMasterName();
|
||||
final NetworkDisruption networkDisruption = isolateClusterManagerDisruption(NetworkDisruption.DISCONNECT);
|
||||
internalCluster().setDisruptionScheme(networkDisruption);
|
||||
|
||||
blockMasterFromFinalizingSnapshotOnIndexFile(repoName);
|
||||
final ActionFuture<CreateSnapshotResponse> firstFailedSnapshotFuture = startFullSnapshotFromMasterClient(
|
||||
final ActionFuture<CreateSnapshotResponse> firstFailedSnapshotFuture = startFullSnapshotFromClusterManagerClient(
|
||||
repoName,
|
||||
"failing-snapshot-1"
|
||||
);
|
||||
waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
final ActionFuture<CreateSnapshotResponse> secondFailedSnapshotFuture = startFullSnapshotFromMasterClient(
|
||||
waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
final ActionFuture<CreateSnapshotResponse> secondFailedSnapshotFuture = startFullSnapshotFromClusterManagerClient(
|
||||
repoName,
|
||||
"failing-snapshot-2"
|
||||
);
|
||||
awaitNumberOfSnapshotsInProgress(2);
|
||||
|
||||
final ActionFuture<AcknowledgedResponse> failedDeleteFuture = client(masterNode).admin()
|
||||
final ActionFuture<AcknowledgedResponse> failedDeleteFuture = client(clusterManagerNode).admin()
|
||||
.cluster()
|
||||
.prepareDeleteSnapshot(repoName, "*")
|
||||
.execute();
|
||||
|
@ -725,7 +738,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
networkDisruption.startDisrupting();
|
||||
ensureStableCluster(3, dataNode);
|
||||
unblockNode(repoName, masterNode);
|
||||
unblockNode(repoName, clusterManagerNode);
|
||||
networkDisruption.stopDisrupting();
|
||||
|
||||
logger.info("--> make sure all failing requests get a response");
|
||||
|
@ -736,7 +749,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
awaitNoMoreRunningOperations();
|
||||
}
|
||||
|
||||
public void testQueuedOperationsAndBrokenRepoOnMasterFailOver() throws Exception {
|
||||
public void testQueuedOperationsAndBrokenRepoOnClusterManagerFailOver() throws Exception {
|
||||
disableRepoConsistencyCheck("This test corrupts the repository on purpose");
|
||||
|
||||
internalCluster().startMasterOnlyNodes(3);
|
||||
|
@ -755,7 +768,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
client().admin().cluster().prepareCreateSnapshot(repoName, "snapshot-three").setWaitForCompletion(false).get();
|
||||
|
||||
final ActionFuture<AcknowledgedResponse> deleteFuture = startDeleteFromNonMasterClient(repoName, "*");
|
||||
final ActionFuture<AcknowledgedResponse> deleteFuture = startDeleteFromNonClusterManagerClient(repoName, "*");
|
||||
awaitNDeletionsInProgress(2);
|
||||
|
||||
internalCluster().stopCurrentMasterNode();
|
||||
|
@ -765,7 +778,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
expectThrows(RepositoryException.class, deleteFuture::actionGet);
|
||||
}
|
||||
|
||||
public void testQueuedSnapshotOperationsAndBrokenRepoOnMasterFailOver() throws Exception {
|
||||
public void testQueuedSnapshotOperationsAndBrokenRepoOnClusterManagerFailOver() throws Exception {
|
||||
disableRepoConsistencyCheck("This test corrupts the repository on purpose");
|
||||
|
||||
internalCluster().startMasterOnlyNodes(3);
|
||||
|
@ -777,14 +790,14 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
createNSnapshots(repoName, randomIntBetween(2, 5));
|
||||
|
||||
final long generation = getRepositoryData(repoName).getGenId();
|
||||
final String masterNode = internalCluster().getMasterName();
|
||||
blockNodeOnAnyFiles(repoName, masterNode);
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotThree = startFullSnapshotFromNonMasterClient(repoName, "snapshot-three");
|
||||
waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
final String clusterManagerNode = internalCluster().getMasterName();
|
||||
blockNodeOnAnyFiles(repoName, clusterManagerNode);
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotThree = startFullSnapshotFromNonClusterManagerClient(repoName, "snapshot-three");
|
||||
waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
|
||||
corruptIndexN(repoPath, generation);
|
||||
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotFour = startFullSnapshotFromNonMasterClient(repoName, "snapshot-four");
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotFour = startFullSnapshotFromNonClusterManagerClient(repoName, "snapshot-four");
|
||||
internalCluster().stopCurrentMasterNode();
|
||||
ensureStableCluster(3);
|
||||
|
||||
|
@ -793,7 +806,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
expectThrows(OpenSearchException.class, snapshotFour::actionGet);
|
||||
}
|
||||
|
||||
public void testQueuedSnapshotOperationsAndBrokenRepoOnMasterFailOver2() throws Exception {
|
||||
public void testQueuedSnapshotOperationsAndBrokenRepoOnClusterManagerFailOver2() throws Exception {
|
||||
disableRepoConsistencyCheck("This test corrupts the repository on purpose");
|
||||
|
||||
internalCluster().startMasterOnlyNodes(3);
|
||||
|
@ -805,28 +818,28 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
createNSnapshots(repoName, randomIntBetween(2, 5));
|
||||
|
||||
final long generation = getRepositoryData(repoName).getGenId();
|
||||
final String masterNode = internalCluster().getMasterName();
|
||||
final String clusterManagerNode = internalCluster().getMasterName();
|
||||
blockMasterFromFinalizingSnapshotOnIndexFile(repoName);
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotThree = startFullSnapshotFromNonMasterClient(repoName, "snapshot-three");
|
||||
waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotThree = startFullSnapshotFromNonClusterManagerClient(repoName, "snapshot-three");
|
||||
waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
|
||||
corruptIndexN(repoPath, generation);
|
||||
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotFour = startFullSnapshotFromNonMasterClient(repoName, "snapshot-four");
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotFour = startFullSnapshotFromNonClusterManagerClient(repoName, "snapshot-four");
|
||||
awaitNumberOfSnapshotsInProgress(2);
|
||||
|
||||
final NetworkDisruption networkDisruption = isolateMasterDisruption(NetworkDisruption.DISCONNECT);
|
||||
final NetworkDisruption networkDisruption = isolateClusterManagerDisruption(NetworkDisruption.DISCONNECT);
|
||||
internalCluster().setDisruptionScheme(networkDisruption);
|
||||
networkDisruption.startDisrupting();
|
||||
ensureStableCluster(3, dataNode);
|
||||
unblockNode(repoName, masterNode);
|
||||
unblockNode(repoName, clusterManagerNode);
|
||||
networkDisruption.stopDisrupting();
|
||||
awaitNoMoreRunningOperations();
|
||||
expectThrows(OpenSearchException.class, snapshotThree::actionGet);
|
||||
expectThrows(OpenSearchException.class, snapshotFour::actionGet);
|
||||
}
|
||||
|
||||
public void testQueuedSnapshotOperationsAndBrokenRepoOnMasterFailOverMultipleRepos() throws Exception {
|
||||
public void testQueuedSnapshotOperationsAndBrokenRepoOnClusterManagerFailOverMultipleRepos() throws Exception {
|
||||
disableRepoConsistencyCheck("This test corrupts the repository on purpose");
|
||||
|
||||
internalCluster().startMasterOnlyNodes(3, LARGE_SNAPSHOT_POOL_SETTINGS);
|
||||
|
@ -837,30 +850,30 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
createIndexWithContent("index-one");
|
||||
createNSnapshots(repoName, randomIntBetween(2, 5));
|
||||
|
||||
final String masterNode = internalCluster().getMasterName();
|
||||
final String clusterManagerNode = internalCluster().getMasterName();
|
||||
|
||||
final String blockedRepoName = "repo-blocked";
|
||||
createRepository(blockedRepoName, "mock");
|
||||
createNSnapshots(blockedRepoName, randomIntBetween(1, 5));
|
||||
blockNodeOnAnyFiles(blockedRepoName, masterNode);
|
||||
final ActionFuture<AcknowledgedResponse> deleteFuture = startDeleteFromNonMasterClient(blockedRepoName, "*");
|
||||
waitForBlock(masterNode, blockedRepoName, TimeValue.timeValueSeconds(30L));
|
||||
blockNodeOnAnyFiles(blockedRepoName, clusterManagerNode);
|
||||
final ActionFuture<AcknowledgedResponse> deleteFuture = startDeleteFromNonClusterManagerClient(blockedRepoName, "*");
|
||||
waitForBlock(clusterManagerNode, blockedRepoName, TimeValue.timeValueSeconds(30L));
|
||||
awaitNDeletionsInProgress(1);
|
||||
final ActionFuture<CreateSnapshotResponse> createBlockedSnapshot = startFullSnapshotFromNonMasterClient(
|
||||
final ActionFuture<CreateSnapshotResponse> createBlockedSnapshot = startFullSnapshotFromNonClusterManagerClient(
|
||||
blockedRepoName,
|
||||
"queued-snapshot"
|
||||
);
|
||||
awaitNumberOfSnapshotsInProgress(1);
|
||||
|
||||
final long generation = getRepositoryData(repoName).getGenId();
|
||||
blockNodeOnAnyFiles(repoName, masterNode);
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotThree = startFullSnapshotFromNonMasterClient(repoName, "snapshot-three");
|
||||
waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
blockNodeOnAnyFiles(repoName, clusterManagerNode);
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotThree = startFullSnapshotFromNonClusterManagerClient(repoName, "snapshot-three");
|
||||
waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
awaitNumberOfSnapshotsInProgress(2);
|
||||
|
||||
corruptIndexN(repoPath, generation);
|
||||
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotFour = startFullSnapshotFromNonMasterClient(repoName, "snapshot-four");
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotFour = startFullSnapshotFromNonClusterManagerClient(repoName, "snapshot-four");
|
||||
awaitNumberOfSnapshotsInProgress(3);
|
||||
internalCluster().stopCurrentMasterNode();
|
||||
ensureStableCluster(3);
|
||||
|
@ -872,8 +885,8 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
try {
|
||||
createBlockedSnapshot.actionGet();
|
||||
} catch (OpenSearchException ex) {
|
||||
// Ignored, thrown most of the time but due to retries when shutting down the master could randomly pass when the request is
|
||||
// retried and gets executed after the above delete
|
||||
// Ignored, thrown most of the time but due to retries when shutting down the cluster-manager could randomly pass
|
||||
// when the request is retried and gets executed after the above delete
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1011,13 +1024,13 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotThree = startAndBlockFailingFullSnapshot(repoName, "snap-other");
|
||||
|
||||
final String masterName = internalCluster().getMasterName();
|
||||
final String clusterManagerName = internalCluster().getMasterName();
|
||||
|
||||
final String snapshotOne = snapshotNames.get(0);
|
||||
final ActionFuture<AcknowledgedResponse> deleteSnapshotOne = startDeleteSnapshot(repoName, snapshotOne);
|
||||
awaitNDeletionsInProgress(1);
|
||||
|
||||
unblockNode(repoName, masterName);
|
||||
unblockNode(repoName, clusterManagerName);
|
||||
|
||||
expectThrows(SnapshotException.class, snapshotThree::actionGet);
|
||||
assertAcked(deleteSnapshotOne.get());
|
||||
|
@ -1067,20 +1080,20 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testMasterFailoverOnFinalizationLoop() throws Exception {
|
||||
public void testClusterManagerFailoverOnFinalizationLoop() throws Exception {
|
||||
internalCluster().startMasterOnlyNodes(3);
|
||||
final String dataNode = internalCluster().startDataOnlyNode();
|
||||
final String repoName = "test-repo";
|
||||
createRepository(repoName, "mock");
|
||||
createIndexWithContent("index-test");
|
||||
final NetworkDisruption networkDisruption = isolateMasterDisruption(NetworkDisruption.DISCONNECT);
|
||||
final NetworkDisruption networkDisruption = isolateClusterManagerDisruption(NetworkDisruption.DISCONNECT);
|
||||
internalCluster().setDisruptionScheme(networkDisruption);
|
||||
|
||||
final List<String> snapshotNames = createNSnapshots(repoName, randomIntBetween(2, 5));
|
||||
final String masterName = internalCluster().getMasterName();
|
||||
final String clusterManagerName = internalCluster().getMasterName();
|
||||
blockMasterFromDeletingIndexNFile(repoName);
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotThree = startFullSnapshotFromMasterClient(repoName, "snap-other");
|
||||
waitForBlock(masterName, repoName, TimeValue.timeValueSeconds(30L));
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotThree = startFullSnapshotFromClusterManagerClient(repoName, "snap-other");
|
||||
waitForBlock(clusterManagerName, repoName, TimeValue.timeValueSeconds(30L));
|
||||
|
||||
final String snapshotOne = snapshotNames.get(0);
|
||||
final ActionFuture<AcknowledgedResponse> deleteSnapshotOne = startDeleteSnapshot(repoName, snapshotOne);
|
||||
|
@ -1088,7 +1101,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
networkDisruption.startDisrupting();
|
||||
ensureStableCluster(3, dataNode);
|
||||
|
||||
unblockNode(repoName, masterName);
|
||||
unblockNode(repoName, clusterManagerName);
|
||||
networkDisruption.stopDisrupting();
|
||||
ensureStableCluster(4);
|
||||
|
||||
|
@ -1180,7 +1193,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
assertSuccessful(createSlowFuture3);
|
||||
}
|
||||
|
||||
public void testMasterFailoverAndMultipleQueuedUpSnapshotsAcrossTwoRepos() throws Exception {
|
||||
public void testClusterManagerFailoverAndMultipleQueuedUpSnapshotsAcrossTwoRepos() throws Exception {
|
||||
disableRepoConsistencyCheck("This test corrupts the repository on purpose");
|
||||
|
||||
internalCluster().startMasterOnlyNodes(3, LARGE_SNAPSHOT_POOL_SETTINGS);
|
||||
|
@ -1206,9 +1219,9 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
client().admin().cluster().prepareCreateSnapshot(otherRepoName, "snapshot-other-blocked-2").setWaitForCompletion(false).get();
|
||||
|
||||
awaitNumberOfSnapshotsInProgress(4);
|
||||
final String initialMaster = internalCluster().getMasterName();
|
||||
waitForBlock(initialMaster, repoName, TimeValue.timeValueSeconds(30L));
|
||||
waitForBlock(initialMaster, otherRepoName, TimeValue.timeValueSeconds(30L));
|
||||
final String initialClusterManager = internalCluster().getMasterName();
|
||||
waitForBlock(initialClusterManager, repoName, TimeValue.timeValueSeconds(30L));
|
||||
waitForBlock(initialClusterManager, otherRepoName, TimeValue.timeValueSeconds(30L));
|
||||
|
||||
internalCluster().stopCurrentMasterNode();
|
||||
ensureStableCluster(3, dataNode);
|
||||
|
@ -1384,7 +1397,7 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
unblockNode(repoName, clusterManagerName);
|
||||
awaitNumberOfSnapshotsInProgress(1);
|
||||
|
||||
awaitMasterFinishRepoOperations();
|
||||
awaitClusterManagerFinishRepoOperations();
|
||||
|
||||
unblockNode(repoName, dataNode);
|
||||
assertSuccessful(blockedSnapshot);
|
||||
|
@ -1416,13 +1429,13 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
return snapshotNames;
|
||||
}
|
||||
|
||||
private ActionFuture<AcknowledgedResponse> startDeleteFromNonMasterClient(String repoName, String snapshotName) {
|
||||
logger.info("--> deleting snapshot [{}] from repo [{}] from non master client", snapshotName, repoName);
|
||||
private ActionFuture<AcknowledgedResponse> startDeleteFromNonClusterManagerClient(String repoName, String snapshotName) {
|
||||
logger.info("--> deleting snapshot [{}] from repo [{}] from non cluster-manager client", snapshotName, repoName);
|
||||
return internalCluster().nonMasterClient().admin().cluster().prepareDeleteSnapshot(repoName, snapshotName).execute();
|
||||
}
|
||||
|
||||
private ActionFuture<CreateSnapshotResponse> startFullSnapshotFromNonMasterClient(String repoName, String snapshotName) {
|
||||
logger.info("--> creating full snapshot [{}] to repo [{}] from non master client", snapshotName, repoName);
|
||||
private ActionFuture<CreateSnapshotResponse> startFullSnapshotFromNonClusterManagerClient(String repoName, String snapshotName) {
|
||||
logger.info("--> creating full snapshot [{}] to repo [{}] from non cluster-manager client", snapshotName, repoName);
|
||||
return internalCluster().nonMasterClient()
|
||||
.admin()
|
||||
.cluster()
|
||||
|
@ -1431,8 +1444,8 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
.execute();
|
||||
}
|
||||
|
||||
private ActionFuture<CreateSnapshotResponse> startFullSnapshotFromMasterClient(String repoName, String snapshotName) {
|
||||
logger.info("--> creating full snapshot [{}] to repo [{}] from master client", snapshotName, repoName);
|
||||
private ActionFuture<CreateSnapshotResponse> startFullSnapshotFromClusterManagerClient(String repoName, String snapshotName) {
|
||||
logger.info("--> creating full snapshot [{}] to repo [{}] from cluster-manager client", snapshotName, repoName);
|
||||
return internalCluster().masterClient()
|
||||
.admin()
|
||||
.cluster()
|
||||
|
@ -1488,10 +1501,10 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
private ActionFuture<AcknowledgedResponse> startAndBlockOnDeleteSnapshot(String repoName, String snapshotName)
|
||||
throws InterruptedException {
|
||||
final String masterName = internalCluster().getMasterName();
|
||||
blockNodeOnAnyFiles(repoName, masterName);
|
||||
final String clusterManagerName = internalCluster().getMasterName();
|
||||
blockNodeOnAnyFiles(repoName, clusterManagerName);
|
||||
final ActionFuture<AcknowledgedResponse> fut = startDeleteSnapshot(repoName, snapshotName);
|
||||
waitForBlock(masterName, repoName, TimeValue.timeValueSeconds(30L));
|
||||
waitForBlock(clusterManagerName, repoName, TimeValue.timeValueSeconds(30L));
|
||||
return fut;
|
||||
}
|
||||
|
||||
|
|
|
@ -835,7 +835,7 @@ public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTest
|
|||
}
|
||||
}
|
||||
|
||||
public void testMasterShutdownDuringSnapshot() throws Exception {
|
||||
public void testClusterManagerShutdownDuringSnapshot() throws Exception {
|
||||
logger.info("--> starting two cluster-manager nodes and two data nodes");
|
||||
internalCluster().startMasterOnlyNodes(2);
|
||||
internalCluster().startDataOnlyNodes(2);
|
||||
|
@ -873,7 +873,7 @@ public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTest
|
|||
assertEquals(0, snapshotInfo.failedShards());
|
||||
}
|
||||
|
||||
public void testMasterAndDataShutdownDuringSnapshot() throws Exception {
|
||||
public void testClusterManagerAndDataShutdownDuringSnapshot() throws Exception {
|
||||
logger.info("--> starting three cluster-manager nodes and two data nodes");
|
||||
internalCluster().startMasterOnlyNodes(3);
|
||||
internalCluster().startDataOnlyNodes(2);
|
||||
|
@ -890,7 +890,7 @@ public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTest
|
|||
final int numberOfShards = getNumShards("test-idx").numPrimaries;
|
||||
logger.info("number of shards: {}", numberOfShards);
|
||||
|
||||
final String masterNode = blockMasterFromFinalizingSnapshotOnSnapFile("test-repo");
|
||||
final String clusterManagerNode = blockMasterFromFinalizingSnapshotOnSnapFile("test-repo");
|
||||
final String dataNode = blockNodeWithIndex("test-repo", "test-idx");
|
||||
|
||||
dataNodeClient().admin()
|
||||
|
@ -902,7 +902,7 @@ public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTest
|
|||
|
||||
logger.info("--> stopping data node {}", dataNode);
|
||||
stopNode(dataNode);
|
||||
logger.info("--> stopping cluster-manager node {} ", masterNode);
|
||||
logger.info("--> stopping cluster-manager node {} ", clusterManagerNode);
|
||||
internalCluster().stopCurrentMasterNode();
|
||||
|
||||
logger.info("--> wait until the snapshot is done");
|
||||
|
@ -1143,7 +1143,7 @@ public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTest
|
|||
assertThat(snapshot3IndexMetaFiles, hasSize(1)); // should have deleted the metadata blob referenced by the first two snapshots
|
||||
}
|
||||
|
||||
public void testDataNodeRestartWithBusyMasterDuringSnapshot() throws Exception {
|
||||
public void testDataNodeRestartWithBusyClusterManagerDuringSnapshot() throws Exception {
|
||||
logger.info("--> starting a cluster-manager node and two data nodes");
|
||||
internalCluster().startClusterManagerOnlyNode();
|
||||
internalCluster().startDataOnlyNodes(2);
|
||||
|
|
|
@ -198,8 +198,8 @@ public class MetadataLoadingDuringSnapshotRestoreIT extends AbstractSnapshotInte
|
|||
}
|
||||
|
||||
private CountingMockRepository getCountingMockRepository() {
|
||||
String master = internalCluster().getMasterName();
|
||||
RepositoriesService repositoriesService = internalCluster().getInstance(RepositoriesService.class, master);
|
||||
String clusterManager = internalCluster().getMasterName();
|
||||
RepositoriesService repositoriesService = internalCluster().getInstance(RepositoriesService.class, clusterManager);
|
||||
Repository repository = repositoriesService.repository("repository");
|
||||
assertThat(repository, instanceOf(CountingMockRepository.class));
|
||||
return (CountingMockRepository) repository;
|
||||
|
|
|
@ -177,15 +177,15 @@ public class RepositoriesIT extends AbstractSnapshotIntegTestCase {
|
|||
|
||||
// Make repository to throw exception when trying to delete stale indices
|
||||
// This will make sure stale indices stays in repository after snapshot delete
|
||||
String masterNode = internalCluster().getMasterName();
|
||||
((MockRepository) internalCluster().getInstance(RepositoriesService.class, masterNode).repository("test-repo"))
|
||||
String clusterManagerNode = internalCluster().getMasterName();
|
||||
((MockRepository) internalCluster().getInstance(RepositoriesService.class, clusterManagerNode).repository("test-repo"))
|
||||
.setThrowExceptionWhileDelete(true);
|
||||
|
||||
logger.info("--> delete the bulk of the snapshots");
|
||||
client.admin().cluster().prepareDeleteSnapshot(repositoryName, bulkSnapshotsPattern).get();
|
||||
|
||||
// Make repository to work normally
|
||||
((MockRepository) internalCluster().getInstance(RepositoriesService.class, masterNode).repository("test-repo"))
|
||||
((MockRepository) internalCluster().getInstance(RepositoriesService.class, clusterManagerNode).repository("test-repo"))
|
||||
.setThrowExceptionWhileDelete(false);
|
||||
|
||||
// This snapshot should delete last snapshot's residual stale indices as well
|
||||
|
|
|
@ -70,7 +70,7 @@ public class RepositoryFilterUserMetadataIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
|
||||
public void testFilteredRepoMetadataIsUsed() {
|
||||
final String masterName = internalCluster().getMasterName();
|
||||
final String clusterManagerName = internalCluster().getMasterName();
|
||||
final String repoName = "test-repo";
|
||||
assertAcked(
|
||||
client().admin()
|
||||
|
@ -78,7 +78,9 @@ public class RepositoryFilterUserMetadataIT extends OpenSearchIntegTestCase {
|
|||
.preparePutRepository(repoName)
|
||||
.setType(MetadataFilteringPlugin.TYPE)
|
||||
.setSettings(
|
||||
Settings.builder().put("location", randomRepoPath()).put(MetadataFilteringPlugin.MASTER_SETTING_VALUE, masterName)
|
||||
Settings.builder()
|
||||
.put("location", randomRepoPath())
|
||||
.put(MetadataFilteringPlugin.CLUSTER_MANAGER_SETTING_VALUE, clusterManagerName)
|
||||
)
|
||||
);
|
||||
createIndex("test-idx");
|
||||
|
@ -88,15 +90,18 @@ public class RepositoryFilterUserMetadataIT extends OpenSearchIntegTestCase {
|
|||
.setWaitForCompletion(true)
|
||||
.get()
|
||||
.getSnapshotInfo();
|
||||
assertThat(snapshotInfo.userMetadata(), is(Collections.singletonMap(MetadataFilteringPlugin.MOCK_FILTERED_META, masterName)));
|
||||
assertThat(
|
||||
snapshotInfo.userMetadata(),
|
||||
is(Collections.singletonMap(MetadataFilteringPlugin.MOCK_FILTERED_META, clusterManagerName))
|
||||
);
|
||||
}
|
||||
|
||||
// Mock plugin that stores the name of the master node that started a snapshot in each snapshot's metadata
|
||||
// Mock plugin that stores the name of the cluster-manager node that started a snapshot in each snapshot's metadata
|
||||
public static final class MetadataFilteringPlugin extends org.opensearch.plugins.Plugin implements RepositoryPlugin {
|
||||
|
||||
private static final String MOCK_FILTERED_META = "mock_filtered_meta";
|
||||
|
||||
private static final String MASTER_SETTING_VALUE = "initial_master";
|
||||
private static final String CLUSTER_MANAGER_SETTING_VALUE = "initial_cluster_manager";
|
||||
|
||||
private static final String TYPE = "mock_meta_filtering";
|
||||
|
||||
|
@ -112,8 +117,8 @@ public class RepositoryFilterUserMetadataIT extends OpenSearchIntegTestCase {
|
|||
metadata -> new FsRepository(metadata, env, namedXContentRegistry, clusterService, recoverySettings) {
|
||||
|
||||
// Storing the initially expected metadata value here to verify that #filterUserMetadata is only called once on the
|
||||
// initial master node starting the snapshot
|
||||
private final String initialMetaValue = metadata.settings().get(MASTER_SETTING_VALUE);
|
||||
// initial cluster-manager node starting the snapshot
|
||||
private final String initialMetaValue = metadata.settings().get(CLUSTER_MANAGER_SETTING_VALUE);
|
||||
|
||||
@Override
|
||||
public void finalizeSnapshot(
|
||||
|
|
|
@ -83,7 +83,7 @@ public class SnapshotShardsServiceIT extends AbstractSnapshotIntegTestCase {
|
|||
final SnapshotId snapshotId = getSnapshot("test-repo", "test-snap").snapshotId();
|
||||
|
||||
logger.info("--> start disrupting cluster");
|
||||
final NetworkDisruption networkDisruption = isolateMasterDisruption(NetworkDisruption.NetworkDelay.random(random()));
|
||||
final NetworkDisruption networkDisruption = isolateClusterManagerDisruption(NetworkDisruption.NetworkDelay.random(random()));
|
||||
internalCluster().setDisruptionScheme(networkDisruption);
|
||||
networkDisruption.startDisrupting();
|
||||
|
||||
|
|
|
@ -237,7 +237,7 @@ public class SnapshotStatusApisIT extends AbstractSnapshotIntegTestCase {
|
|||
blockDataNode(repoName, dataNodeOne);
|
||||
|
||||
final String snapshotOne = "snap-1";
|
||||
// restarting a data node below so using a master client here
|
||||
// restarting a data node below so using a cluster-manager client here
|
||||
final ActionFuture<CreateSnapshotResponse> responseSnapshotOne = internalCluster().masterClient()
|
||||
.admin()
|
||||
.cluster()
|
||||
|
|
|
@ -77,9 +77,9 @@ public class ConcurrentDocumentOperationIT extends OpenSearchIntegTestCase {
|
|||
client().admin().indices().prepareRefresh().execute().actionGet();
|
||||
|
||||
logger.info("done indexing, check all have the same field value");
|
||||
Map masterSource = client().prepareGet("test", "1").execute().actionGet().getSourceAsMap();
|
||||
Map clusterManagerSource = client().prepareGet("test", "1").execute().actionGet().getSourceAsMap();
|
||||
for (int i = 0; i < (cluster().size() * 5); i++) {
|
||||
assertThat(client().prepareGet("test", "1").execute().actionGet().getSourceAsMap(), equalTo(masterSource));
|
||||
assertThat(client().prepareGet("test", "1").execute().actionGet().getSourceAsMap(), equalTo(clusterManagerSource));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -627,7 +627,7 @@ public abstract class AbstractSnapshotIntegTestCase extends OpenSearchIntegTestC
|
|||
return snapshotInfos.get(0);
|
||||
}
|
||||
|
||||
protected void awaitMasterFinishRepoOperations() throws Exception {
|
||||
protected void awaitClusterManagerFinishRepoOperations() throws Exception {
|
||||
logger.info("--> waiting for cluster-manager to finish all repo operations on its SNAPSHOT pool");
|
||||
final ThreadPool clusterManagerThreadPool = internalCluster().getMasterNodeInstance(ThreadPool.class);
|
||||
assertBusy(() -> {
|
||||
|
|
|
@ -712,7 +712,7 @@ public abstract class OpenSearchIntegTestCase extends OpenSearchTestCase {
|
|||
* @param disruptionType type of disruption to create
|
||||
* @return disruption
|
||||
*/
|
||||
protected static NetworkDisruption isolateMasterDisruption(NetworkDisruption.NetworkLinkDisruptionType disruptionType) {
|
||||
protected static NetworkDisruption isolateClusterManagerDisruption(NetworkDisruption.NetworkLinkDisruptionType disruptionType) {
|
||||
final String clusterManagerNode = internalCluster().getMasterName();
|
||||
return new NetworkDisruption(
|
||||
new NetworkDisruption.TwoPartitions(
|
||||
|
|
Loading…
Reference in New Issue