Fixing 503 Service Unavailable errors during fetch phase (#39086)

When ESRejectedExecutionException gets thrown on the coordinating node while trying to fetch hits, the resulting exception will hold no shard failures, hence `503` is used as the response status code. In that case, `429` should be returned instead. Also, the status code should be taken from the cause if available whenever there are no shard failures instead of blindly returning `503` like we currently do.

Closes #38586
This commit is contained in:
Ankit Jain 2019-03-08 08:08:55 -08:00 committed by Luca Cavanna
parent 8925a2c6c2
commit 471aa6a16a
2 changed files with 40 additions and 2 deletions

View File

@ -85,8 +85,9 @@ public class SearchPhaseExecutionException extends ElasticsearchException {
@Override
public RestStatus status() {
if (shardFailures.length == 0) {
// if no successful shards, it means no active shards, so just return SERVICE_UNAVAILABLE
return RestStatus.SERVICE_UNAVAILABLE;
// if no successful shards, the failure can be due to EsRejectedExecutionException during fetch phase
// on coordinator node. so get the status from cause instead of returning SERVICE_UNAVAILABLE blindly
return getCause() == null ? RestStatus.SERVICE_UNAVAILABLE : ExceptionsHelper.status(getCause());
}
RestStatus status = shardFailures[0].status();
if (shardFailures.length > 1) {

View File

@ -25,6 +25,7 @@ import org.elasticsearch.action.TimestampParsingException;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContent;
import org.elasticsearch.common.xcontent.XContentParser;
@ -32,6 +33,7 @@ import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.shard.IndexShardClosedException;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.InvalidIndexTemplateException;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.test.ESTestCase;
@ -121,4 +123,39 @@ public class SearchPhaseExecutionExceptionTests extends ESTestCase {
// SearchPhaseExecutionException has no cause field
assertNull(parsedException.getCause());
}
public void testPhaseFailureWithoutSearchShardFailure() {
final ShardSearchFailure[] searchShardFailures = new ShardSearchFailure[0];
final String phase = randomFrom("fetch", "search", "other");
SearchPhaseExecutionException actual = new SearchPhaseExecutionException(phase, "unexpected failures",
new EsRejectedExecutionException("ES rejected execution of fetch phase"), searchShardFailures);
assertEquals(actual.status(), RestStatus.TOO_MANY_REQUESTS);
}
public void testPhaseFailureWithoutSearchShardFailureAndCause() {
final ShardSearchFailure[] searchShardFailures = new ShardSearchFailure[0];
final String phase = randomFrom("fetch", "search", "other");
SearchPhaseExecutionException actual = new SearchPhaseExecutionException(phase, "unexpected failures", null, searchShardFailures);
assertEquals(actual.status(), RestStatus.SERVICE_UNAVAILABLE);
}
public void testPhaseFailureWithSearchShardFailure() {
final ShardSearchFailure[] shardSearchFailures = new ShardSearchFailure[randomIntBetween(1, 5)];
for (int i = 0; i < shardSearchFailures.length; i++) {
Exception cause = randomFrom(
new ParsingException(1, 2, "foobar", null),
new InvalidIndexTemplateException("foo", "bar")
);
shardSearchFailures[i] = new ShardSearchFailure(cause, new SearchShardTarget("node_" + i,
new ShardId("test", "_na_", i), null, OriginalIndices.NONE));
}
final String phase = randomFrom("fetch", "search", "other");
SearchPhaseExecutionException actual = new SearchPhaseExecutionException(phase, "unexpected failures",
new EsRejectedExecutionException("ES rejected execution of fetch phase"), shardSearchFailures);
assertEquals(actual.status(), RestStatus.BAD_REQUEST);
}
}