Only ack cluster state updates successfully applied on all nodes ()

The cluster state acking mechanism currently incorrectly acks cluster state updates that have not
successfully been applied on all nodes. In a situation, for example, where some of the nodes
disconnect during publishing, and don't acknowledge receiving the new cluster state, the user-facing
action (e.g. create index request) will still consider this as an ack.
This commit is contained in:
Yannick Welsch 2018-05-23 08:56:32 +02:00 committed by GitHub
parent 886db84ad2
commit cceaa9a0f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 55 additions and 13 deletions
server/src

@ -25,7 +25,11 @@ import org.elasticsearch.common.unit.TimeValue;
public interface AckedClusterStateTaskListener extends ClusterStateTaskListener { public interface AckedClusterStateTaskListener extends ClusterStateTaskListener {
/** /**
* Called to determine which nodes the acknowledgement is expected from * Called to determine which nodes the acknowledgement is expected from.
*
* As this method will be called multiple times to determine the set of acking nodes,
* it is crucial for it to return consistent results: Given the same listener instance
* and the same node parameter, the method implementation should return the same result.
* *
* @param discoveryNode a node * @param discoveryNode a node
* @return true if the node is expected to send ack back, false otherwise * @return true if the node is expected to send ack back, false otherwise

@ -61,7 +61,7 @@ public abstract class AckedClusterStateUpdateTask<Response> extends ClusterState
* @param e optional error that might have been thrown * @param e optional error that might have been thrown
*/ */
public void onAllNodesAcked(@Nullable Exception e) { public void onAllNodesAcked(@Nullable Exception e) {
listener.onResponse(newResponse(true)); listener.onResponse(newResponse(e == null));
} }
protected abstract Response newResponse(boolean acknowledged); protected abstract Response newResponse(boolean acknowledged);

@ -363,7 +363,7 @@ public class MetaDataMappingService extends AbstractComponent {
@Override @Override
public void onAllNodesAcked(@Nullable Exception e) { public void onAllNodesAcked(@Nullable Exception e) {
listener.onResponse(new ClusterStateUpdateResponse(true)); listener.onResponse(new ClusterStateUpdateResponse(e == null));
} }
@Override @Override

@ -563,7 +563,7 @@ public class MasterService extends AbstractLifecycleComponent {
private final AckedClusterStateTaskListener ackedTaskListener; private final AckedClusterStateTaskListener ackedTaskListener;
private final CountDown countDown; private final CountDown countDown;
private final DiscoveryNodes nodes; private final DiscoveryNode masterNode;
private final long clusterStateVersion; private final long clusterStateVersion;
private final Future<?> ackTimeoutCallback; private final Future<?> ackTimeoutCallback;
private Exception lastFailure; private Exception lastFailure;
@ -572,15 +572,14 @@ public class MasterService extends AbstractLifecycleComponent {
ThreadPool threadPool) { ThreadPool threadPool) {
this.ackedTaskListener = ackedTaskListener; this.ackedTaskListener = ackedTaskListener;
this.clusterStateVersion = clusterStateVersion; this.clusterStateVersion = clusterStateVersion;
this.nodes = nodes; this.masterNode = nodes.getMasterNode();
int countDown = 0; int countDown = 0;
for (DiscoveryNode node : nodes) { for (DiscoveryNode node : nodes) {
if (ackedTaskListener.mustAck(node)) { //we always wait for at least the master node
if (node.equals(masterNode) || ackedTaskListener.mustAck(node)) {
countDown++; countDown++;
} }
} }
//we always wait for at least 1 node (the master)
countDown = Math.max(1, countDown);
logger.trace("expecting {} acknowledgements for cluster_state update (version: {})", countDown, clusterStateVersion); logger.trace("expecting {} acknowledgements for cluster_state update (version: {})", countDown, clusterStateVersion);
this.countDown = new CountDown(countDown); this.countDown = new CountDown(countDown);
this.ackTimeoutCallback = threadPool.schedule(ackedTaskListener.ackTimeout(), ThreadPool.Names.GENERIC, () -> onTimeout()); this.ackTimeoutCallback = threadPool.schedule(ackedTaskListener.ackTimeout(), ThreadPool.Names.GENERIC, () -> onTimeout());
@ -588,11 +587,8 @@ public class MasterService extends AbstractLifecycleComponent {
@Override @Override
public void onNodeAck(DiscoveryNode node, @Nullable Exception e) { public void onNodeAck(DiscoveryNode node, @Nullable Exception e) {
if (!ackedTaskListener.mustAck(node)) { if (node.equals(masterNode) == false && ackedTaskListener.mustAck(node) == false) {
//we always wait for the master ack anyway return;
if (!node.equals(nodes.getMasterNode())) {
return;
}
} }
if (e == null) { if (e == null) {
logger.trace("ack received from node [{}], cluster_state update (version: {})", node, clusterStateVersion); logger.trace("ack received from node [{}], cluster_state update (version: {})", node, clusterStateVersion);

@ -23,6 +23,7 @@ import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse; import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
import org.elasticsearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse; import org.elasticsearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse;
import org.elasticsearch.action.admin.indices.close.CloseIndexResponse; import org.elasticsearch.action.admin.indices.close.CloseIndexResponse;
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
import org.elasticsearch.action.admin.indices.open.OpenIndexResponse; import org.elasticsearch.action.admin.indices.open.OpenIndexResponse;
import org.elasticsearch.client.Client; import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterState;
@ -33,8 +34,16 @@ import org.elasticsearch.cluster.routing.allocation.decider.ConcurrentRebalanceA
import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.discovery.DiscoverySettings; import org.elasticsearch.discovery.DiscoverySettings;
import org.elasticsearch.discovery.zen.PublishClusterStateAction;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.ESIntegTestCase.ClusterScope; import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
import org.elasticsearch.test.transport.MockTransportService;
import org.elasticsearch.transport.TransportService;
import java.util.Arrays;
import java.util.Collection;
import java.util.stream.Stream;
import static org.elasticsearch.test.ESIntegTestCase.Scope.TEST; import static org.elasticsearch.test.ESIntegTestCase.Scope.TEST;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
@ -43,6 +52,11 @@ import static org.hamcrest.Matchers.equalTo;
@ClusterScope(scope = TEST, minNumDataNodes = 2) @ClusterScope(scope = TEST, minNumDataNodes = 2)
public class AckClusterUpdateSettingsIT extends ESIntegTestCase { public class AckClusterUpdateSettingsIT extends ESIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(MockTransportService.TestPlugin.class);
}
@Override @Override
protected Settings nodeSettings(int nodeOrdinal) { protected Settings nodeSettings(int nodeOrdinal) {
return Settings.builder() return Settings.builder()
@ -156,4 +170,32 @@ public class AckClusterUpdateSettingsIT extends ESIntegTestCase {
assertThat(openIndexResponse.isAcknowledged(), equalTo(false)); assertThat(openIndexResponse.isAcknowledged(), equalTo(false));
ensureGreen("test"); // make sure that recovery from disk has completed, so that check index doesn't fail. ensureGreen("test"); // make sure that recovery from disk has completed, so that check index doesn't fail.
} }
public void testAckingFailsIfNotPublishedToAllNodes() {
String masterNode = internalCluster().getMasterName();
String nonMasterNode = Stream.of(internalCluster().getNodeNames())
.filter(node -> node.equals(masterNode) == false).findFirst().get();
MockTransportService masterTransportService =
(MockTransportService) internalCluster().getInstance(TransportService.class, masterNode);
MockTransportService nonMasterTransportService =
(MockTransportService) internalCluster().getInstance(TransportService.class, nonMasterNode);
logger.info("blocking cluster state publishing from master [{}] to non master [{}]", masterNode, nonMasterNode);
if (randomBoolean() && internalCluster().numMasterNodes() != 2) {
masterTransportService.addFailToSendNoConnectRule(nonMasterTransportService, PublishClusterStateAction.SEND_ACTION_NAME);
} else {
masterTransportService.addFailToSendNoConnectRule(nonMasterTransportService, PublishClusterStateAction.COMMIT_ACTION_NAME);
}
CreateIndexResponse response = client().admin().indices().prepareCreate("test").get();
assertFalse(response.isAcknowledged());
logger.info("waiting for cluster to reform");
masterTransportService.clearRule(nonMasterTransportService);
ensureStableCluster(internalCluster().size());
assertAcked(client().admin().indices().prepareDelete("test"));
}
} }