Cross-cluster search: preserve cluster alias in shard failures (#32608)

When some remote clusters return shard failures as part of a cross-cluster search request, the cluster alias currently gets lost. As a result, if the shard failures are all caused by the same error, and against indices belonging to different clusters, but with the same index name, only one failure gets returned as part of the search response, meaning that failures are grouped by index name, ignoring the cluster alias.

With this commit we make sure that `ShardSearchFailure` returns the cluster alias as part of the index name. Also, we set the fully qualfied index name when creating a `QueryShardException`. That way shard failures are grouped by cluster:index. Such fixes should cover at least most of the cases where either 1) the shard target is set but we don't have the index in the cause (we were previously reading it only from the cause that did not have the cluster alias) 2) the shard target is missing but if the cause is a `QueryShardException` the cluster alias does not get lost.

We also prevent NPE in case the failure cause is not set and test such scenario.
This commit is contained in:
Luca Cavanna 2018-08-06 11:48:50 +02:00 committed by GitHub
parent 3cf08326ab
commit 826399f9fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 336 additions and 35 deletions

View File

@ -39,6 +39,7 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Queue;
import java.util.Set;
@ -278,7 +279,7 @@ public final class ExceptionsHelper {
List<ShardOperationFailedException> uniqueFailures = new ArrayList<>();
Set<GroupBy> reasons = new HashSet<>();
for (ShardOperationFailedException failure : failures) {
GroupBy reason = new GroupBy(failure.getCause());
GroupBy reason = new GroupBy(failure);
if (reasons.contains(reason) == false) {
reasons.add(reason);
uniqueFailures.add(failure);
@ -287,46 +288,52 @@ public final class ExceptionsHelper {
return uniqueFailures.toArray(new ShardOperationFailedException[0]);
}
static class GroupBy {
private static class GroupBy {
final String reason;
final String index;
final Class<? extends Throwable> causeType;
GroupBy(Throwable t) {
if (t instanceof ElasticsearchException) {
final Index index = ((ElasticsearchException) t).getIndex();
if (index != null) {
this.index = index.getName();
} else {
this.index = null;
GroupBy(ShardOperationFailedException failure) {
Throwable cause = failure.getCause();
//the index name from the failure contains the cluster alias when using CCS. Ideally failures should be grouped by
//index name and cluster alias. That's why the failure index name has the precedence over the one coming from the cause,
//which does not include the cluster alias.
String indexName = failure.index();
if (indexName == null) {
if (cause instanceof ElasticsearchException) {
final Index index = ((ElasticsearchException) cause).getIndex();
if (index != null) {
indexName = index.getName();
}
}
} else {
index = null;
}
reason = t.getMessage();
causeType = t.getClass();
this.index = indexName;
if (cause == null) {
this.reason = failure.reason();
this.causeType = null;
} else {
this.reason = cause.getMessage();
this.causeType = cause.getClass();
}
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
GroupBy groupBy = (GroupBy) o;
if (!causeType.equals(groupBy.causeType)) return false;
if (index != null ? !index.equals(groupBy.index) : groupBy.index != null) return false;
if (reason != null ? !reason.equals(groupBy.reason) : groupBy.reason != null) return false;
return true;
return Objects.equals(reason, groupBy.reason) &&
Objects.equals(index, groupBy.index) &&
Objects.equals(causeType, groupBy.causeType);
}
@Override
public int hashCode() {
int result = reason != null ? reason.hashCode() : 0;
result = 31 * result + (index != null ? index.hashCode() : 0);
result = 31 * result + causeType.hashCode();
return result;
return Objects.hash(reason, index, causeType);
}
}
}

View File

@ -34,6 +34,7 @@ import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchException;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.transport.RemoteClusterAware;
import java.io.IOException;
@ -66,7 +67,7 @@ public class ShardSearchFailure implements ShardOperationFailedException {
public ShardSearchFailure(Exception e, @Nullable SearchShardTarget shardTarget) {
final Throwable actual = ExceptionsHelper.unwrapCause(e);
if (actual != null && actual instanceof SearchException) {
if (actual instanceof SearchException) {
this.shardTarget = ((SearchException) actual).shard();
} else if (shardTarget != null) {
this.shardTarget = shardTarget;
@ -105,7 +106,7 @@ public class ShardSearchFailure implements ShardOperationFailedException {
@Override
public String index() {
if (shardTarget != null) {
return shardTarget.getIndex();
return shardTarget.getFullyQualifiedIndexName();
}
return null;
}
@ -186,6 +187,7 @@ public class ShardSearchFailure implements ShardOperationFailedException {
String currentFieldName = null;
int shardId = -1;
String indexName = null;
String clusterAlias = null;
String nodeId = null;
ElasticsearchException exception = null;
while((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
@ -196,6 +198,11 @@ public class ShardSearchFailure implements ShardOperationFailedException {
shardId = parser.intValue();
} else if (INDEX_FIELD.equals(currentFieldName)) {
indexName = parser.text();
int indexOf = indexName.indexOf(RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR);
if (indexOf > 0) {
clusterAlias = indexName.substring(0, indexOf);
indexName = indexName.substring(indexOf + 1);
}
} else if (NODE_FIELD.equals(currentFieldName)) {
nodeId = parser.text();
} else {
@ -214,7 +221,7 @@ public class ShardSearchFailure implements ShardOperationFailedException {
SearchShardTarget searchShardTarget = null;
if (nodeId != null) {
searchShardTarget = new SearchShardTarget(nodeId,
new ShardId(new Index(indexName, IndexMetaData.INDEX_UUID_NA_VALUE), shardId), null, OriginalIndices.NONE);
new ShardId(new Index(indexName, IndexMetaData.INDEX_UUID_NA_VALUE), shardId), clusterAlias, OriginalIndices.NONE);
}
return new ShardSearchFailure(exception, searchShardTarget);
}

View File

@ -38,7 +38,7 @@ public class QueryShardException extends ElasticsearchException {
public QueryShardException(QueryShardContext context, String msg, Throwable cause, Object... args) {
super(msg, cause, args);
setIndex(context.index());
setIndex(context.getFullyQualifiedIndexName());
}
/**

View File

@ -19,8 +19,6 @@
package org.elasticsearch.search;
import java.io.IOException;
import org.elasticsearch.Version;
import org.elasticsearch.action.OriginalIndices;
import org.elasticsearch.common.Nullable;
@ -32,6 +30,8 @@ import org.elasticsearch.index.Index;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.transport.RemoteClusterAware;
import java.io.IOException;
/**
* The target that the search request was executed on.
*/
@ -96,6 +96,13 @@ public final class SearchShardTarget implements Writeable, Comparable<SearchShar
return clusterAlias;
}
/**
* Returns the fully qualified index name, including the cluster alias.
*/
public String getFullyQualifiedIndexName() {
return RemoteClusterAware.buildRemoteIndexName(getClusterAlias(), getIndex());
}
@Override
public int compareTo(SearchShardTarget o) {
int i = shardId.getIndexName().compareTo(o.getIndex());

View File

@ -20,15 +20,27 @@
package org.elasticsearch;
import org.apache.commons.codec.DecoderException;
import org.elasticsearch.action.OriginalIndices;
import org.elasticsearch.action.ShardOperationFailedException;
import org.elasticsearch.action.search.ShardSearchFailure;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.transport.RemoteClusterAware;
import java.util.Optional;
import static org.elasticsearch.ExceptionsHelper.MAX_ITERATIONS;
import static org.elasticsearch.ExceptionsHelper.maybeError;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.instanceOf;
import static org.hamcrest.CoreMatchers.nullValue;
public class ExceptionsHelperTests extends ESTestCase {
@ -91,4 +103,107 @@ public class ExceptionsHelperTests extends ESTestCase {
assertThat(ExceptionsHelper.status(new EsRejectedExecutionException("rejected")), equalTo(RestStatus.TOO_MANY_REQUESTS));
}
public void testGroupBy() {
ShardOperationFailedException[] failures = new ShardOperationFailedException[]{
createShardFailureParsingException("error", "node0", "index", 0, null),
createShardFailureParsingException("error", "node1", "index", 1, null),
createShardFailureParsingException("error", "node2", "index2", 2, null),
createShardFailureParsingException("error", "node0", "index", 0, "cluster1"),
createShardFailureParsingException("error", "node1", "index", 1, "cluster1"),
createShardFailureParsingException("error", "node2", "index", 2, "cluster1"),
createShardFailureParsingException("error", "node0", "index", 0, "cluster2"),
createShardFailureParsingException("error", "node1", "index", 1, "cluster2"),
createShardFailureParsingException("error", "node2", "index", 2, "cluster2"),
createShardFailureParsingException("another error", "node2", "index", 2, "cluster2")
};
ShardOperationFailedException[] groupBy = ExceptionsHelper.groupBy(failures);
assertThat(groupBy.length, equalTo(5));
String[] expectedIndices = new String[]{"index", "index2", "cluster1:index", "cluster2:index", "cluster2:index"};
String[] expectedErrors = new String[]{"error", "error", "error", "error", "another error"};
int i = 0;
for (ShardOperationFailedException shardOperationFailedException : groupBy) {
assertThat(shardOperationFailedException.getCause().getMessage(), equalTo(expectedErrors[i]));
assertThat(shardOperationFailedException.index(), equalTo(expectedIndices[i++]));
}
}
private static ShardSearchFailure createShardFailureParsingException(String error, String nodeId,
String index, int shardId, String clusterAlias) {
ParsingException ex = new ParsingException(0, 0, error, new IllegalArgumentException("some bad argument"));
ex.setIndex(index);
return new ShardSearchFailure(ex, createSearchShardTarget(nodeId, shardId, index, clusterAlias));
}
private static SearchShardTarget createSearchShardTarget(String nodeId, int shardId, String index, String clusterAlias) {
return new SearchShardTarget(nodeId,
new ShardId(new Index(index, IndexMetaData.INDEX_UUID_NA_VALUE), shardId), clusterAlias, OriginalIndices.NONE);
}
public void testGroupByNullTarget() {
ShardOperationFailedException[] failures = new ShardOperationFailedException[] {
createShardFailureQueryShardException("error", "index", null),
createShardFailureQueryShardException("error", "index", null),
createShardFailureQueryShardException("error", "index", null),
createShardFailureQueryShardException("error", "index", "cluster1"),
createShardFailureQueryShardException("error", "index", "cluster1"),
createShardFailureQueryShardException("error", "index", "cluster1"),
createShardFailureQueryShardException("error", "index", "cluster2"),
createShardFailureQueryShardException("error", "index", "cluster2"),
createShardFailureQueryShardException("error", "index2", null),
createShardFailureQueryShardException("another error", "index2", null),
};
ShardOperationFailedException[] groupBy = ExceptionsHelper.groupBy(failures);
assertThat(groupBy.length, equalTo(5));
String[] expectedIndices = new String[]{"index", "cluster1:index", "cluster2:index", "index2", "index2"};
String[] expectedErrors = new String[]{"error", "error", "error", "error", "another error"};
int i = 0;
for (ShardOperationFailedException shardOperationFailedException : groupBy) {
assertThat(shardOperationFailedException.index(), nullValue());
assertThat(shardOperationFailedException.getCause(), instanceOf(ElasticsearchException.class));
ElasticsearchException elasticsearchException = (ElasticsearchException) shardOperationFailedException.getCause();
assertThat(elasticsearchException.getMessage(), equalTo(expectedErrors[i]));
assertThat(elasticsearchException.getIndex().getName(), equalTo(expectedIndices[i++]));
}
}
private static ShardSearchFailure createShardFailureQueryShardException(String error, String indexName, String clusterAlias) {
Index index = new Index(RemoteClusterAware.buildRemoteIndexName(clusterAlias, indexName), "uuid");
QueryShardException queryShardException = new QueryShardException(index, error, new IllegalArgumentException("parse error"));
return new ShardSearchFailure(queryShardException, null);
}
public void testGroupByNullCause() {
ShardOperationFailedException[] failures = new ShardOperationFailedException[] {
new ShardSearchFailure("error", createSearchShardTarget("node0", 0, "index", null)),
new ShardSearchFailure("error", createSearchShardTarget("node1", 1, "index", null)),
new ShardSearchFailure("error", createSearchShardTarget("node1", 1, "index2", null)),
new ShardSearchFailure("error", createSearchShardTarget("node2", 2, "index", "cluster1")),
new ShardSearchFailure("error", createSearchShardTarget("node1", 1, "index", "cluster1")),
new ShardSearchFailure("a different error", createSearchShardTarget("node3", 3, "index", "cluster1"))
};
ShardOperationFailedException[] groupBy = ExceptionsHelper.groupBy(failures);
assertThat(groupBy.length, equalTo(4));
String[] expectedIndices = new String[]{"index", "index2", "cluster1:index", "cluster1:index"};
String[] expectedErrors = new String[]{"error", "error", "error", "a different error"};
int i = 0;
for (ShardOperationFailedException shardOperationFailedException : groupBy) {
assertThat(shardOperationFailedException.reason(), equalTo(expectedErrors[i]));
assertThat(shardOperationFailedException.index(), equalTo(expectedIndices[i++]));
}
}
public void testGroupByNullIndex() {
ShardOperationFailedException[] failures = new ShardOperationFailedException[] {
new ShardSearchFailure("error", null),
new ShardSearchFailure(new IllegalArgumentException("error")),
new ShardSearchFailure(new ParsingException(0, 0, "error", null)),
};
ShardOperationFailedException[] groupBy = ExceptionsHelper.groupBy(failures);
assertThat(groupBy.length, equalTo(3));
}
}

View File

@ -45,8 +45,9 @@ public class ShardSearchFailureTests extends ESTestCase {
if (randomBoolean()) {
String nodeId = randomAlphaOfLengthBetween(5, 10);
String indexName = randomAlphaOfLengthBetween(5, 10);
String clusterAlias = randomBoolean() ? randomAlphaOfLengthBetween(5, 10) : null;
searchShardTarget = new SearchShardTarget(nodeId,
new ShardId(new Index(indexName, IndexMetaData.INDEX_UUID_NA_VALUE), randomInt()), null, null);
new ShardId(new Index(indexName, IndexMetaData.INDEX_UUID_NA_VALUE), randomInt()), clusterAlias, OriginalIndices.NONE);
}
return new ShardSearchFailure(ex, searchShardTarget);
}
@ -115,4 +116,22 @@ public class ShardSearchFailureTests extends ESTestCase {
+ "}",
xContent.utf8ToString());
}
public void testToXContentWithClusterAlias() throws IOException {
ShardSearchFailure failure = new ShardSearchFailure(new ParsingException(0, 0, "some message", null),
new SearchShardTarget("nodeId", new ShardId(new Index("indexName", "indexUuid"), 123), "cluster1", OriginalIndices.NONE));
BytesReference xContent = toXContent(failure, XContentType.JSON, randomBoolean());
assertEquals(
"{\"shard\":123,"
+ "\"index\":\"cluster1:indexName\","
+ "\"node\":\"nodeId\","
+ "\"reason\":{"
+ "\"type\":\"parsing_exception\","
+ "\"reason\":\"some message\","
+ "\"line\":0,"
+ "\"col\":0"
+ "}"
+ "}",
xContent.utf8ToString());
}
}

View File

@ -20,22 +20,33 @@
package org.elasticsearch.rest.action;
import com.fasterxml.jackson.core.io.JsonEOFException;
import java.util.Arrays;
import org.elasticsearch.action.OriginalIndices;
import org.elasticsearch.action.ShardOperationFailedException;
import org.elasticsearch.action.search.ShardSearchFailure;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.search.SearchModule;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.test.ESTestCase;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import java.io.IOException;
import java.util.Arrays;
import static java.util.Collections.emptyList;
import static org.hamcrest.CoreMatchers.equalTo;
public class RestActionsTests extends ESTestCase {
@ -98,9 +109,144 @@ public class RestActionsTests extends ESTestCase {
}
}
public void testBuildBroadcastShardsHeader() throws IOException {
ShardOperationFailedException[] failures = new ShardOperationFailedException[]{
createShardFailureParsingException("node0", 0, null),
createShardFailureParsingException("node1", 1, null),
createShardFailureParsingException("node2", 2, null),
createShardFailureParsingException("node0", 0, "cluster1"),
createShardFailureParsingException("node1", 1, "cluster1"),
createShardFailureParsingException("node2", 2, "cluster1"),
createShardFailureParsingException("node0", 0, "cluster2"),
createShardFailureParsingException("node1", 1, "cluster2"),
createShardFailureParsingException("node2", 2, "cluster2")
};
XContentBuilder builder = JsonXContent.contentBuilder();
builder.prettyPrint();
builder.startObject();
RestActions.buildBroadcastShardsHeader(builder, ToXContent.EMPTY_PARAMS, 12, 3, 0, 9, failures);
builder.endObject();
assertThat(Strings.toString(builder), equalTo("{\n" +
" \"_shards\" : {\n" +
" \"total\" : 12,\n" +
" \"successful\" : 3,\n" +
" \"skipped\" : 0,\n" +
" \"failed\" : 9,\n" +
" \"failures\" : [\n" +
" {\n" +
" \"shard\" : 0,\n" +
" \"index\" : \"index\",\n" +
" \"node\" : \"node0\",\n" +
" \"reason\" : {\n" +
" \"type\" : \"parsing_exception\",\n" +
" \"reason\" : \"error\",\n" +
" \"index_uuid\" : \"_na_\",\n" +
" \"index\" : \"index\",\n" +
" \"line\" : 0,\n" +
" \"col\" : 0,\n" +
" \"caused_by\" : {\n" +
" \"type\" : \"illegal_argument_exception\",\n" +
" \"reason\" : \"some bad argument\"\n" +
" }\n" +
" }\n" +
" },\n" +
" {\n" +
" \"shard\" : 0,\n" +
" \"index\" : \"cluster1:index\",\n" +
" \"node\" : \"node0\",\n" +
" \"reason\" : {\n" +
" \"type\" : \"parsing_exception\",\n" +
" \"reason\" : \"error\",\n" +
" \"index_uuid\" : \"_na_\",\n" +
" \"index\" : \"index\",\n" +
" \"line\" : 0,\n" +
" \"col\" : 0,\n" +
" \"caused_by\" : {\n" +
" \"type\" : \"illegal_argument_exception\",\n" +
" \"reason\" : \"some bad argument\"\n" +
" }\n" +
" }\n" +
" },\n" +
" {\n" +
" \"shard\" : 0,\n" +
" \"index\" : \"cluster2:index\",\n" +
" \"node\" : \"node0\",\n" +
" \"reason\" : {\n" +
" \"type\" : \"parsing_exception\",\n" +
" \"reason\" : \"error\",\n" +
" \"index_uuid\" : \"_na_\",\n" +
" \"index\" : \"index\",\n" +
" \"line\" : 0,\n" +
" \"col\" : 0,\n" +
" \"caused_by\" : {\n" +
" \"type\" : \"illegal_argument_exception\",\n" +
" \"reason\" : \"some bad argument\"\n" +
" }\n" +
" }\n" +
" }\n" +
" ]\n" +
" }\n" +
"}"));
}
private static ShardSearchFailure createShardFailureParsingException(String nodeId, int shardId, String clusterAlias) {
String index = "index";
ParsingException ex = new ParsingException(0, 0, "error", new IllegalArgumentException("some bad argument"));
ex.setIndex(index);
return new ShardSearchFailure(ex, createSearchShardTarget(nodeId, shardId, index, clusterAlias));
}
private static SearchShardTarget createSearchShardTarget(String nodeId, int shardId, String index, String clusterAlias) {
return new SearchShardTarget(nodeId,
new ShardId(new Index(index, IndexMetaData.INDEX_UUID_NA_VALUE), shardId), clusterAlias, OriginalIndices.NONE);
}
public void testBuildBroadcastShardsHeaderNullCause() throws Exception {
ShardOperationFailedException[] failures = new ShardOperationFailedException[] {
new ShardSearchFailure("error", createSearchShardTarget("node0", 0, "index", null)),
new ShardSearchFailure("error", createSearchShardTarget("node1", 1, "index", null)),
new ShardSearchFailure("error", createSearchShardTarget("node2", 2, "index", "cluster1")),
new ShardSearchFailure("error", createSearchShardTarget("node1", 1, "index", "cluster1")),
new ShardSearchFailure("a different error", createSearchShardTarget("node3", 3, "index", "cluster1"))
};
XContentBuilder builder = JsonXContent.contentBuilder();
builder.prettyPrint();
builder.startObject();
RestActions.buildBroadcastShardsHeader(builder, ToXContent.EMPTY_PARAMS, 12, 3, 0, 9, failures);
builder.endObject();
//TODO the reason is not printed out, as a follow-up we should probably either print it out when the cause is null,
//or even better enforce that the cause can't be null
assertThat(Strings.toString(builder), equalTo("{\n" +
" \"_shards\" : {\n" +
" \"total\" : 12,\n" +
" \"successful\" : 3,\n" +
" \"skipped\" : 0,\n" +
" \"failed\" : 9,\n" +
" \"failures\" : [\n" +
" {\n" +
" \"shard\" : 0,\n" +
" \"index\" : \"index\",\n" +
" \"node\" : \"node0\"\n" +
" },\n" +
" {\n" +
" \"shard\" : 2,\n" +
" \"index\" : \"cluster1:index\",\n" +
" \"node\" : \"node2\"\n" +
" },\n" +
" {\n" +
" \"shard\" : 3,\n" +
" \"index\" : \"cluster1:index\",\n" +
" \"node\" : \"node3\"\n" +
" }\n" +
" ]\n" +
" }\n" +
"}"));
}
@Override
protected NamedXContentRegistry xContentRegistry() {
return xContentRegistry;
}
}