tighten up FailedToCommitClusterStateException semantics and other feedback
This commit is contained in:
parent
98ed133dd7
commit
c9ee8dbd16
|
@ -26,6 +26,7 @@ import org.elasticsearch.common.io.stream.StreamOutput;
|
|||
import org.elasticsearch.common.logging.support.LoggerMessageFormat;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.discovery.Discovery;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.IndexNotFoundException;
|
||||
import org.elasticsearch.index.shard.ShardId;
|
||||
|
@ -597,7 +598,7 @@ public class ElasticsearchException extends RuntimeException implements ToXConte
|
|||
IndexNotFoundException.class,
|
||||
ShardNotFoundException.class,
|
||||
NotSerializableExceptionWrapper.class,
|
||||
org.elasticsearch.discovery.zen.publish.PublishClusterStateAction.FailedToCommitException.class
|
||||
Discovery.FailedToCommitClusterStateException.class
|
||||
};
|
||||
Map<String, Constructor<? extends ElasticsearchException>> mapping = new HashMap<>(exceptions.length);
|
||||
for (Class<? extends ElasticsearchException> e : exceptions) {
|
||||
|
|
|
@ -40,7 +40,6 @@ import org.elasticsearch.common.logging.ESLogger;
|
|||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.text.StringText;
|
||||
import org.elasticsearch.common.transport.BoundTransportAddress;
|
||||
import org.elasticsearch.common.transport.TransportAddress;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
import org.elasticsearch.common.util.concurrent.*;
|
||||
|
@ -485,8 +484,8 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
|
|||
logger.debug("publishing cluster state version [{}]", newClusterState.version());
|
||||
try {
|
||||
discoveryService.publish(clusterChangedEvent, ackListener);
|
||||
} catch (Throwable t) {
|
||||
logger.warn("failing [{}]: failed to publish cluster state version [{}]", t, source, newClusterState.version());
|
||||
} catch (Discovery.FailedToCommitClusterStateException t) {
|
||||
logger.warn("failing [{}]: failed to commit cluster state version [{}]", t, source, newClusterState.version());
|
||||
updateTask.onFailure(source, t);
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -19,15 +19,17 @@
|
|||
|
||||
package org.elasticsearch.discovery;
|
||||
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.cluster.ClusterChangedEvent;
|
||||
import org.elasticsearch.cluster.ClusterState;
|
||||
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||
import org.elasticsearch.cluster.routing.RoutingService;
|
||||
import org.elasticsearch.cluster.routing.allocation.AllocationService;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.component.LifecycleComponent;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.node.service.NodeService;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A pluggable module allowing to implement discovery of other nodes, publishing of the cluster
|
||||
* state to all nodes, electing a master of the cluster that raises cluster state change
|
||||
|
@ -60,11 +62,29 @@ public interface Discovery extends LifecycleComponent<Discovery> {
|
|||
*
|
||||
* The {@link AckListener} allows to keep track of the ack received from nodes, and verify whether
|
||||
* they updated their own cluster state or not.
|
||||
*
|
||||
* The method is guaranteed to throw a {@link FailedToCommitClusterStateException} if the change is not committed and should be rejected.
|
||||
* Any other exception signals the something wrong happened but the change is committed.
|
||||
*/
|
||||
void publish(ClusterChangedEvent clusterChangedEvent, AckListener ackListener);
|
||||
|
||||
public static interface AckListener {
|
||||
interface AckListener {
|
||||
void onNodeAck(DiscoveryNode node, @Nullable Throwable t);
|
||||
void onTimeout();
|
||||
}
|
||||
|
||||
class FailedToCommitClusterStateException extends ElasticsearchException {
|
||||
|
||||
public FailedToCommitClusterStateException(StreamInput in) throws IOException {
|
||||
super(in);
|
||||
}
|
||||
|
||||
public FailedToCommitClusterStateException(String msg, Object... args) {
|
||||
super(msg, args);
|
||||
}
|
||||
|
||||
public FailedToCommitClusterStateException(String msg, Throwable cause, Object... args) {
|
||||
super(msg, cause, args);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -331,7 +331,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
|
|||
nodesFD.updateNodesAndPing(clusterChangedEvent.state());
|
||||
try {
|
||||
publishClusterState.publish(clusterChangedEvent, electMaster.minimumMasterNodes(), ackListener);
|
||||
} catch (PublishClusterStateAction.FailedToCommitException t) {
|
||||
} catch (FailedToCommitClusterStateException t) {
|
||||
// cluster service logs a WARN message
|
||||
logger.debug("failed to publish cluster state version [{}] (not enough nodes acknowledged, min master nodes [{}])", clusterChangedEvent.state().version(), electMaster.minimumMasterNodes());
|
||||
clusterService.submitStateUpdateTask("zen-disco-failed-to-publish", Priority.IMMEDIATE, new ClusterStateUpdateTask() {
|
||||
|
|
|
@ -93,39 +93,73 @@ public class PublishClusterStateAction extends AbstractComponent {
|
|||
transportService.removeHandler(COMMIT_ACTION_NAME);
|
||||
}
|
||||
|
||||
public void publish(ClusterChangedEvent clusterChangedEvent, int minMasterNodes, final Discovery.AckListener ackListener) {
|
||||
final DiscoveryNodes nodes = clusterChangedEvent.state().nodes();
|
||||
Set<DiscoveryNode> nodesToPublishTo = new HashSet<>(nodes.size());
|
||||
DiscoveryNode localNode = nodes.localNode();
|
||||
final int totalMasterNodes = nodes.masterNodes().size();
|
||||
for (final DiscoveryNode node : nodes) {
|
||||
if (node.equals(localNode) == false) {
|
||||
nodesToPublishTo.add(node);
|
||||
/**
|
||||
* publishes a cluster change event to other nodes. if at least minMasterNodes acknowledge the change it is committed and will
|
||||
* be processed by the master and the other nodes.
|
||||
* <p/>
|
||||
* The method is guaranteed to throw a {@link Discovery.FailedToCommitClusterStateException} if the change is not committed and should be rejected.
|
||||
* Any other exception signals the something wrong happened but the change is committed.
|
||||
*/
|
||||
public void publish(final ClusterChangedEvent clusterChangedEvent, final int minMasterNodes, final Discovery.AckListener ackListener) throws Discovery.FailedToCommitClusterStateException {
|
||||
final DiscoveryNodes nodes;
|
||||
final SendingController sendingController;
|
||||
final Set<DiscoveryNode> nodesToPublishTo;
|
||||
final Map<Version, BytesReference> serializedStates;
|
||||
final Map<Version, BytesReference> serializedDiffs;
|
||||
final boolean sendFullVersion;
|
||||
try {
|
||||
nodes = clusterChangedEvent.state().nodes();
|
||||
nodesToPublishTo = new HashSet<>(nodes.size());
|
||||
DiscoveryNode localNode = nodes.localNode();
|
||||
final int totalMasterNodes = nodes.masterNodes().size();
|
||||
for (final DiscoveryNode node : nodes) {
|
||||
if (node.equals(localNode) == false) {
|
||||
nodesToPublishTo.add(node);
|
||||
}
|
||||
}
|
||||
sendFullVersion = !discoverySettings.getPublishDiff() || clusterChangedEvent.previousState() == null;
|
||||
serializedStates = Maps.newHashMap();
|
||||
serializedDiffs = Maps.newHashMap();
|
||||
|
||||
// we build these early as a best effort not to commit in the case of error.
|
||||
// sadly this is not water tight as it may that a failed diff based publishing to a node
|
||||
// will cause a full serialization based on an older version, which may fail after the
|
||||
// change has been committed.
|
||||
buildDiffAndSerializeStates(clusterChangedEvent.state(), clusterChangedEvent.previousState(),
|
||||
nodesToPublishTo, sendFullVersion, serializedStates, serializedDiffs);
|
||||
|
||||
final BlockingClusterStatePublishResponseHandler publishResponseHandler = new AckClusterStatePublishResponseHandler(nodesToPublishTo, ackListener);
|
||||
sendingController = new SendingController(clusterChangedEvent.state(), minMasterNodes, totalMasterNodes, publishResponseHandler);
|
||||
} catch (Throwable t) {
|
||||
throw new Discovery.FailedToCommitClusterStateException("unexpected error while preparing to publish", t);
|
||||
}
|
||||
|
||||
try {
|
||||
innerPublish(clusterChangedEvent, nodesToPublishTo, sendingController, sendFullVersion, serializedStates, serializedDiffs);
|
||||
} catch (Discovery.FailedToCommitClusterStateException t) {
|
||||
throw t;
|
||||
} catch (Throwable t) {
|
||||
// try to fail committing, in cause it's still on going
|
||||
sendingController.markAsFailed("unexpected error [" + t.getMessage() + "]");
|
||||
if (sendingController.isCommitted() == false) {
|
||||
// signal the change should be rejected
|
||||
throw new Discovery.FailedToCommitClusterStateException("unexpected error [{}]", t, t.getMessage());
|
||||
} else {
|
||||
throw t;
|
||||
}
|
||||
}
|
||||
publish(clusterChangedEvent, minMasterNodes, totalMasterNodes, nodesToPublishTo, new AckClusterStatePublishResponseHandler(nodesToPublishTo, ackListener));
|
||||
}
|
||||
|
||||
private void publish(final ClusterChangedEvent clusterChangedEvent, int minMasterNodes, int totalMasterNodes, final Set<DiscoveryNode> nodesToPublishTo,
|
||||
final BlockingClusterStatePublishResponseHandler publishResponseHandler) {
|
||||
|
||||
Map<Version, BytesReference> serializedStates = Maps.newHashMap();
|
||||
Map<Version, BytesReference> serializedDiffs = Maps.newHashMap();
|
||||
private void innerPublish(final ClusterChangedEvent clusterChangedEvent, final Set<DiscoveryNode> nodesToPublishTo,
|
||||
final SendingController sendingController, final boolean sendFullVersion,
|
||||
final Map<Version, BytesReference> serializedStates, final Map<Version, BytesReference> serializedDiffs) {
|
||||
|
||||
final ClusterState clusterState = clusterChangedEvent.state();
|
||||
final ClusterState previousState = clusterChangedEvent.previousState();
|
||||
final TimeValue publishTimeout = discoverySettings.getPublishTimeout();
|
||||
final boolean sendFullVersion = !discoverySettings.getPublishDiff() || previousState == null;
|
||||
final SendingController sendingController = new SendingController(clusterChangedEvent.state(), minMasterNodes, totalMasterNodes, publishResponseHandler);
|
||||
|
||||
final long publishingStartInNanos = System.nanoTime();
|
||||
|
||||
// we build these early as a best effort not to commit in the case of error.
|
||||
// sadly this is not water tight as it may that a failed diff based publishing to a node
|
||||
// will cause a full serialization based on an older version, which may fail after the
|
||||
// change has been committed.
|
||||
buildDiffAndSerializeStates(clusterState, previousState, nodesToPublishTo, sendFullVersion, serializedStates, serializedDiffs);
|
||||
|
||||
for (final DiscoveryNode node : nodesToPublishTo) {
|
||||
// try and serialize the cluster state once (or per version), so we don't serialize it
|
||||
// per node when we send it over the wire, compress it while we are at it...
|
||||
|
@ -141,6 +175,7 @@ public class PublishClusterStateAction extends AbstractComponent {
|
|||
|
||||
try {
|
||||
long timeLeftInNanos = Math.max(0, publishTimeout.nanos() - (System.nanoTime() - publishingStartInNanos));
|
||||
final BlockingClusterStatePublishResponseHandler publishResponseHandler = sendingController.getPublishResponseHandler();
|
||||
sendingController.setPublishingTimedOut(!publishResponseHandler.awaitAllNodes(TimeValue.timeValueNanos(timeLeftInNanos)));
|
||||
if (sendingController.getPublishingTimedOut()) {
|
||||
DiscoveryNode[] pendingNodes = publishResponseHandler.pendingNodes();
|
||||
|
@ -335,12 +370,13 @@ public class PublishClusterStateAction extends AbstractComponent {
|
|||
}
|
||||
|
||||
// package private for testing
|
||||
|
||||
/**
|
||||
* does simple sanity check of the incoming cluster state. Throws an exception on rejections.
|
||||
*/
|
||||
void validateIncomingState(ClusterState incomingState, ClusterState lastSeenClusterState) {
|
||||
final ClusterName incomingClusterName = incomingState.getClusterName();
|
||||
if (!incomingClusterName.equals(PublishClusterStateAction.this.clusterName)) {
|
||||
if (!incomingClusterName.equals(this.clusterName)) {
|
||||
logger.warn("received cluster state from [{}] which is also master but with a different cluster name [{}]", incomingState.nodes().masterNode(), incomingClusterName);
|
||||
throw new IllegalStateException("received state from a node that is not part of the cluster");
|
||||
}
|
||||
|
@ -443,17 +479,6 @@ public class PublishClusterStateAction extends AbstractComponent {
|
|||
}
|
||||
|
||||
|
||||
public static class FailedToCommitException extends ElasticsearchException {
|
||||
|
||||
public FailedToCommitException(StreamInput in) throws IOException {
|
||||
super(in);
|
||||
}
|
||||
|
||||
public FailedToCommitException(String msg, Object... args) {
|
||||
super(msg, args);
|
||||
}
|
||||
}
|
||||
|
||||
class SendingController {
|
||||
|
||||
private final ClusterState clusterState;
|
||||
|
@ -481,7 +506,7 @@ public class PublishClusterStateAction extends AbstractComponent {
|
|||
this.neededMastersToCommit = Math.max(0, minMasterNodes - 1); // we are one of the master nodes
|
||||
this.pendingMasterNodes = totalMasterNodes - 1;
|
||||
if (this.neededMastersToCommit > this.pendingMasterNodes) {
|
||||
throw new FailedToCommitException("not enough masters to ack sent cluster state. [{}] needed , have [{}]", neededMastersToCommit, pendingMasterNodes);
|
||||
throw new Discovery.FailedToCommitClusterStateException("not enough masters to ack sent cluster state. [{}] needed , have [{}]", neededMastersToCommit, pendingMasterNodes);
|
||||
}
|
||||
this.committed = neededMastersToCommit == 0;
|
||||
this.committedOrFailed = committed;
|
||||
|
@ -493,14 +518,14 @@ public class PublishClusterStateAction extends AbstractComponent {
|
|||
try {
|
||||
timedout = committedOrFailedLatch.await(commitTimeout.millis(), TimeUnit.MILLISECONDS) == false;
|
||||
} catch (InterruptedException e) {
|
||||
|
||||
// the commit check bellow will either translate to an exception or we are committed and we can safely continue
|
||||
}
|
||||
|
||||
if (timedout) {
|
||||
markAsFailed("timed out waiting for commit (commit timeout [" + commitTimeout + "]");
|
||||
}
|
||||
if (isCommitted() == false) {
|
||||
throw new FailedToCommitException("{} enough masters to ack sent cluster state. [{}] left",
|
||||
throw new Discovery.FailedToCommitClusterStateException("{} enough masters to ack sent cluster state. [{}] left",
|
||||
timedout ? "timed out while waiting for" : "failed to get", neededMastersToCommit);
|
||||
}
|
||||
}
|
||||
|
@ -542,7 +567,7 @@ public class PublishClusterStateAction extends AbstractComponent {
|
|||
synchronized private void onMasterNodeDone(DiscoveryNode node) {
|
||||
pendingMasterNodes--;
|
||||
if (pendingMasterNodes == 0 && neededMastersToCommit > 0) {
|
||||
markAsFailed("All master nodes acked or failed but [" + neededMastersToCommit + "] acks are still needed");
|
||||
markAsFailed("no more pending master nodes, but [" + neededMastersToCommit + "] acks are still needed");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -27,16 +27,15 @@ import org.elasticsearch.cluster.metadata.MetaData;
|
|||
import org.elasticsearch.common.Priority;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.discovery.Discovery;
|
||||
import org.elasticsearch.discovery.DiscoverySettings;
|
||||
import org.elasticsearch.discovery.zen.ZenDiscovery;
|
||||
import org.elasticsearch.discovery.zen.elect.ElectMasterService;
|
||||
import org.elasticsearch.discovery.zen.fd.FaultDetection;
|
||||
import org.elasticsearch.discovery.zen.publish.PublishClusterStateAction;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.test.ESIntegTestCase;
|
||||
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
|
||||
import org.elasticsearch.test.disruption.NetworkDelaysPartition;
|
||||
import org.elasticsearch.test.disruption.NetworkUnresponsivePartition;
|
||||
import org.elasticsearch.test.junit.annotations.TestLogging;
|
||||
import org.elasticsearch.test.transport.MockTransportService;
|
||||
import org.junit.Test;
|
||||
|
@ -393,7 +392,7 @@ public class MinimumMasterNodesIT extends ESIntegTestCase {
|
|||
logger.debug("--> waiting for cluster state to be processed/rejected");
|
||||
latch.await();
|
||||
|
||||
assertThat(failure.get(), instanceOf(PublishClusterStateAction.FailedToCommitException.class));
|
||||
assertThat(failure.get(), instanceOf(Discovery.FailedToCommitClusterStateException.class));
|
||||
assertBusy(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
|
|
|
@ -354,7 +354,7 @@ public class PublishClusterStateActionTests extends ESTestCase {
|
|||
|
||||
|
||||
/**
|
||||
* Test concurrent publishing works correctly (although not strictly required, it's a good testamne
|
||||
* Test not waiting publishing works correctly (i.e., publishing times out)
|
||||
*/
|
||||
@Test
|
||||
public void testSimultaneousClusterStatePublishing() throws Exception {
|
||||
|
@ -447,9 +447,9 @@ public class PublishClusterStateActionTests extends ESTestCase {
|
|||
try {
|
||||
publishStateAndWait(nodeA.action, unserializableClusterState, previousClusterState);
|
||||
fail("cluster state published despite of diff errors");
|
||||
} catch (ElasticsearchException e) {
|
||||
} catch (Discovery.FailedToCommitClusterStateException e) {
|
||||
assertThat(e.getCause(), notNullValue());
|
||||
assertThat(e.getCause().getMessage(), containsString("Simulated"));
|
||||
assertThat(e.getCause().getMessage(), containsString("failed to serialize"));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -475,7 +475,7 @@ public class PublishClusterStateActionTests extends ESTestCase {
|
|||
try {
|
||||
publishState(master.action, clusterState, previousState, masterNodes + randomIntBetween(1, 5));
|
||||
fail("cluster state publishing didn't fail despite of not having enough nodes");
|
||||
} catch (PublishClusterStateAction.FailedToCommitException expected) {
|
||||
} catch (Discovery.FailedToCommitClusterStateException expected) {
|
||||
logger.debug("failed to publish as expected", expected);
|
||||
}
|
||||
}
|
||||
|
@ -554,7 +554,7 @@ public class PublishClusterStateActionTests extends ESTestCase {
|
|||
if (expectingToCommit == false) {
|
||||
fail("cluster state publishing didn't fail despite of not have enough nodes");
|
||||
}
|
||||
} catch (PublishClusterStateAction.FailedToCommitException exception) {
|
||||
} catch (Discovery.FailedToCommitClusterStateException exception) {
|
||||
logger.debug("failed to publish as expected", exception);
|
||||
if (expectingToCommit) {
|
||||
throw exception;
|
||||
|
@ -697,7 +697,7 @@ public class PublishClusterStateActionTests extends ESTestCase {
|
|||
try {
|
||||
publishState(master.action, state, master.clusterState, 2).await(1, TimeUnit.HOURS);
|
||||
success = true;
|
||||
} catch (PublishClusterStateAction.FailedToCommitException OK) {
|
||||
} catch (Discovery.FailedToCommitClusterStateException OK) {
|
||||
success = false;
|
||||
}
|
||||
logger.debug("--> publishing [{}], verifying...", success ? "succeeded" : "failed");
|
||||
|
|
Loading…
Reference in New Issue