mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 10:25:15 +00:00
Added node closed exception to the retryable remote exceptions as it's possible to run into this exception instead of a connect exception when the master node is just shutting down but still responding to requests.
This commit is contained in:
parent
ef0a933541
commit
af1ff52e70
@ -46,6 +46,7 @@ import org.elasticsearch.node.NodeClosedException;
|
||||
import org.elasticsearch.tasks.Task;
|
||||
import org.elasticsearch.threadpool.ThreadPool;
|
||||
import org.elasticsearch.transport.ConnectTransportException;
|
||||
import org.elasticsearch.transport.RemoteTransportException;
|
||||
import org.elasticsearch.transport.TransportException;
|
||||
import org.elasticsearch.transport.TransportService;
|
||||
|
||||
@ -180,7 +181,8 @@ public abstract class TransportMasterNodeAction<Request extends MasterNodeReques
|
||||
@Override
|
||||
public void handleException(final TransportException exp) {
|
||||
Throwable cause = exp.unwrapCause();
|
||||
if (cause instanceof ConnectTransportException) {
|
||||
if (cause instanceof ConnectTransportException ||
|
||||
(exp instanceof RemoteTransportException && cause instanceof NodeClosedException)) {
|
||||
// we want to retry here a bit to see if a new master is elected
|
||||
logger.debug("connection exception while trying to forward request with action name [{}] to " +
|
||||
"master node [{}], scheduling a retry. Error: [{}]",
|
||||
|
@ -44,6 +44,7 @@ import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
import org.elasticsearch.discovery.MasterNotDiscoveredException;
|
||||
import org.elasticsearch.node.NodeClosedException;
|
||||
import org.elasticsearch.rest.RestStatus;
|
||||
import org.elasticsearch.tasks.Task;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
@ -391,7 +392,8 @@ public class TransportMasterNodeActionTests extends ESTestCase {
|
||||
assertThat(capturedRequest.action, equalTo("internal:testAction"));
|
||||
|
||||
if (rejoinSameMaster) {
|
||||
transport.handleRemoteError(capturedRequest.requestId, new ConnectTransportException(masterNode, "Fake error"));
|
||||
transport.handleRemoteError(capturedRequest.requestId,
|
||||
randomBoolean() ? new ConnectTransportException(masterNode, "Fake error") : new NodeClosedException(masterNode));
|
||||
assertFalse(listener.isDone());
|
||||
if (randomBoolean()) {
|
||||
// simulate master node removal
|
||||
|
@ -28,7 +28,10 @@ import org.elasticsearch.cluster.service.ClusterService;
|
||||
import org.elasticsearch.common.Priority;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.test.ESIntegTestCase;
|
||||
import org.elasticsearch.test.InternalTestCluster;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
||||
@ -294,4 +297,17 @@ public class ClusterHealthIT extends ESIntegTestCase {
|
||||
assertFalse(healthResponseFuture.get().isTimedOut());
|
||||
}
|
||||
|
||||
public void testHealthOnMasterFailover() throws Exception {
|
||||
final String node = internalCluster().startDataOnlyNode();
|
||||
final List<ActionFuture<ClusterHealthResponse>> responseFutures = new ArrayList<>();
|
||||
// Run a few health requests concurrent to master fail-overs against a data-node to make sure master failover is handled
|
||||
// without exceptions
|
||||
for (int i = 0; i < 20; ++i) {
|
||||
responseFutures.add(client(node).admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).execute());
|
||||
internalCluster().restartNode(internalCluster().getMasterName(), InternalTestCluster.EMPTY_CALLBACK);
|
||||
}
|
||||
for (ActionFuture<ClusterHealthResponse> responseFuture : responseFutures) {
|
||||
assertSame(responseFuture.get().getStatus(), ClusterHealthStatus.GREEN);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user