Make it possible to validate a query on all shards instead of a single random shard (#23697)

This is especially useful when we rewrite the query because the result of the rewrite can be very different on different shards. See #18254 for example.
This commit is contained in:
Igor Motov 2017-03-22 17:39:21 -04:00 committed by GitHub
parent e860fe7363
commit f927a2708d
8 changed files with 197 additions and 54 deletions

View File

@ -19,6 +19,7 @@
package org.elasticsearch.action.admin.indices.validate.query;
import org.elasticsearch.Version;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Streamable;
@ -27,20 +28,26 @@ import java.io.IOException;
public class QueryExplanation implements Streamable {
public static final int RANDOM_SHARD = -1;
private String index;
private int shard = RANDOM_SHARD;
private boolean valid;
private String explanation;
private String error;
QueryExplanation() {
}
public QueryExplanation(String index, boolean valid, String explanation, String error) {
public QueryExplanation(String index, int shard, boolean valid, String explanation,
String error) {
this.index = index;
this.shard = shard;
this.valid = valid;
this.explanation = explanation;
this.error = error;
@ -50,6 +57,10 @@ public class QueryExplanation implements Streamable {
return this.index;
}
public int getShard() {
return this.shard;
}
public boolean isValid() {
return this.valid;
}
@ -65,6 +76,11 @@ public class QueryExplanation implements Streamable {
@Override
public void readFrom(StreamInput in) throws IOException {
index = in.readString();
if (in.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
shard = in.readInt();
} else {
shard = RANDOM_SHARD;
}
valid = in.readBoolean();
explanation = in.readOptionalString();
error = in.readOptionalString();
@ -73,6 +89,9 @@ public class QueryExplanation implements Streamable {
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(index);
if (out.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
out.writeInt(shard);
}
out.writeBoolean(valid);
out.writeOptionalString(explanation);
out.writeOptionalString(error);

View File

@ -89,8 +89,14 @@ public class TransportValidateQueryAction extends TransportBroadcastAction<Valid
@Override
protected GroupShardsIterator shards(ClusterState clusterState, ValidateQueryRequest request, String[] concreteIndices) {
// Hard-code routing to limit request to a single shard, but still, randomize it...
Map<String, Set<String>> routingMap = indexNameExpressionResolver.resolveSearchRouting(clusterState, Integer.toString(Randomness.get().nextInt(1000)), request.indices());
final String routing;
if (request.allShards()) {
routing = null;
} else {
// Random routing to limit request to a single shard
routing = Integer.toString(Randomness.get().nextInt(1000));
}
Map<String, Set<String>> routingMap = indexNameExpressionResolver.resolveSearchRouting(clusterState, routing, request.indices());
return clusterService.operationRouting().searchShards(clusterState, concreteIndices, routingMap, "_local");
}
@ -124,12 +130,13 @@ public class TransportValidateQueryAction extends TransportBroadcastAction<Valid
} else {
ShardValidateQueryResponse validateQueryResponse = (ShardValidateQueryResponse) shardResponse;
valid = valid && validateQueryResponse.isValid();
if (request.explain() || request.rewrite()) {
if (request.explain() || request.rewrite() || request.allShards()) {
if (queryExplanations == null) {
queryExplanations = new ArrayList<>();
}
queryExplanations.add(new QueryExplanation(
validateQueryResponse.getIndex(),
request.allShards() ? validateQueryResponse.getShardId().getId() : QueryExplanation.RANDOM_SHARD,
validateQueryResponse.isValid(),
validateQueryResponse.getExplanation(),
validateQueryResponse.getError()

View File

@ -19,6 +19,7 @@
package org.elasticsearch.action.admin.indices.validate.query;
import org.elasticsearch.Version;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.ValidateActions;
import org.elasticsearch.action.support.IndicesOptions;
@ -43,6 +44,7 @@ public class ValidateQueryRequest extends BroadcastRequest<ValidateQueryRequest>
private boolean explain;
private boolean rewrite;
private boolean allShards;
private String[] types = Strings.EMPTY_ARRAY;
@ -125,6 +127,20 @@ public class ValidateQueryRequest extends BroadcastRequest<ValidateQueryRequest>
return rewrite;
}
/**
* Indicates whether the query should be validated on all shards instead of one random shard
*/
public void allShards(boolean allShards) {
this.allShards = allShards;
}
/**
* Indicates whether the query should be validated on all shards instead of one random shard
*/
public boolean allShards() {
return allShards;
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
@ -138,6 +154,9 @@ public class ValidateQueryRequest extends BroadcastRequest<ValidateQueryRequest>
}
explain = in.readBoolean();
rewrite = in.readBoolean();
if (in.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
allShards = in.readBoolean();
}
}
@Override
@ -150,11 +169,14 @@ public class ValidateQueryRequest extends BroadcastRequest<ValidateQueryRequest>
}
out.writeBoolean(explain);
out.writeBoolean(rewrite);
if (out.getVersion().onOrAfter(Version.V_5_4_0_UNRELEASED)) {
out.writeBoolean(allShards);
}
}
@Override
public String toString() {
return "[" + Arrays.toString(indices) + "]" + Arrays.toString(types) + ", query[" + query + "], explain:" + explain +
", rewrite:" + rewrite;
", rewrite:" + rewrite + ", all_shards:" + allShards;
}
}

View File

@ -64,4 +64,12 @@ public class ValidateQueryRequestBuilder extends BroadcastOperationRequestBuilde
request.rewrite(rewrite);
return this;
}
/**
* Indicates whether the query should be validated on all shards
*/
public ValidateQueryRequestBuilder setAllShards(boolean rewrite) {
request.allShards(rewrite);
return this;
}
}

View File

@ -62,6 +62,7 @@ public class RestValidateQueryAction extends BaseRestHandler {
validateQueryRequest.explain(request.paramAsBoolean("explain", false));
validateQueryRequest.types(Strings.splitStringByCommaToArray(request.param("type")));
validateQueryRequest.rewrite(request.paramAsBoolean("rewrite", false));
validateQueryRequest.allShards(request.paramAsBoolean("all_shards", false));
Exception bodyParsingException = null;
try {
@ -98,6 +99,9 @@ public class RestValidateQueryAction extends BaseRestHandler {
if (explanation.getIndex() != null) {
builder.field(INDEX_FIELD, explanation.getIndex());
}
if(explanation.getShard() >= 0) {
builder.field(SHARD_FIELD, explanation.getShard());
}
builder.field(VALID_FIELD, explanation.isValid());
if (explanation.getError() != null) {
builder.field(ERROR_FIELD, explanation.getError());
@ -132,6 +136,7 @@ public class RestValidateQueryAction extends BaseRestHandler {
}
private static final String INDEX_FIELD = "index";
private static final String SHARD_FIELD = "shard";
private static final String VALID_FIELD = "valid";
private static final String EXPLANATIONS_FIELD = "explanations";
private static final String ERROR_FIELD = "error";

View File

@ -39,11 +39,14 @@ import org.joda.time.format.ISODateTimeFormat;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
@ -252,6 +255,37 @@ public class SimpleValidateQueryIT extends ESIntegTestCase {
containsString("field:huge field:pidgin"), true);
}
public void testExplainWithRewriteValidateQueryAllShards() throws Exception {
client().admin().indices().prepareCreate("test")
.addMapping("type1", "field", "type=text,analyzer=whitespace")
.setSettings(SETTING_NUMBER_OF_SHARDS, 2).get();
// We are relying on specific routing behaviors for the result to be right, so
// we cannot randomize the number of shards or change ids here.
client().prepareIndex("test", "type1", "1")
.setSource("field", "quick lazy huge brown pidgin").get();
client().prepareIndex("test", "type1", "2")
.setSource("field", "the quick brown fox").get();
client().prepareIndex("test", "type1", "3")
.setSource("field", "the quick lazy huge brown fox jumps over the tree").get();
client().prepareIndex("test", "type1", "4")
.setSource("field", "the lazy dog quacks like a duck").get();
refresh();
// prefix queries
assertExplanations(QueryBuilders.matchPhrasePrefixQuery("field", "qu"),
Arrays.asList(
equalTo("field:quick"),
allOf(containsString("field:quick"), containsString("field:quacks"))
), true, true);
assertExplanations(QueryBuilders.matchPhrasePrefixQuery("field", "ju"),
Arrays.asList(
equalTo("field:jumps"),
equalTo("+MatchNoDocsQuery(\"empty MultiPhraseQuery\") +MatchNoDocsQuery(\"No " +
"terms supplied for org.elasticsearch.common.lucene.search." +
"MultiPhrasePrefixQuery\")")
), true, true);
}
public void testIrrelevantPropertiesBeforeQuery() throws IOException {
createIndex("test");
ensureGreen();
@ -280,4 +314,22 @@ public class SimpleValidateQueryIT extends ESIntegTestCase {
assertThat(response.getQueryExplanation().get(0).getExplanation(), matcher);
assertThat(response.isValid(), equalTo(true));
}
private static void assertExplanations(QueryBuilder queryBuilder,
List<Matcher<String>> matchers, boolean withRewrite,
boolean allShards) {
ValidateQueryResponse response = client().admin().indices().prepareValidateQuery("test")
.setTypes("type1")
.setQuery(queryBuilder)
.setExplain(true)
.setRewrite(withRewrite)
.setAllShards(allShards)
.execute().actionGet();
assertThat(response.getQueryExplanation().size(), equalTo(matchers.size()));
for (int i = 0; i < matchers.size(); i++) {
assertThat(response.getQueryExplanation().get(i).getError(), nullValue());
assertThat(response.getQueryExplanation().get(i).getExplanation(), matchers.get(i));
assertThat(response.isValid(), equalTo(true));
}
}
}

View File

@ -146,47 +146,6 @@ When the query is valid, the explanation defaults to the string
representation of that query. With `rewrite` set to `true`, the explanation
is more detailed showing the actual Lucene query that will be executed.
For Fuzzy Queries:
[source,js]
--------------------------------------------------
GET twitter/tweet/_validate/query?rewrite=true
{
"query": {
"match": {
"user": {
"query": "kimchy",
"fuzziness": "auto"
}
}
}
}
--------------------------------------------------
// CONSOLE
// TEST[skip:https://github.com/elastic/elasticsearch/issues/18254]
Response:
[source,js]
--------------------------------------------------
{
"valid": true,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"explanations": [
{
"index": "twitter",
"valid": true,
"explanation": "+user:kimchy +user:kimchi^0.75 #(ConstantScore(_type:tweet))^0.0"
}
]
}
--------------------------------------------------
// TESTRESPONSE
For More Like This:
[source,js]
@ -204,7 +163,7 @@ GET twitter/tweet/_validate/query?rewrite=true
}
--------------------------------------------------
// CONSOLE
// TEST[skip:https://github.com/elastic/elasticsearch/issues/18254]
// TEST[skip:the output is randomized depending on which shard we hit]
Response:
@ -228,6 +187,73 @@ Response:
--------------------------------------------------
// TESTRESPONSE
CAUTION: The request is executed on a single shard only, which is randomly
By default, the request is executed on a single shard only, which is randomly
selected. The detailed explanation of the query may depend on which shard is
being hit, and therefore may vary from one request to another.
being hit, and therefore may vary from one request to another. So, in case of
query rewrite the `all_shards` parameter should be used to get response from
all available shards.
For Fuzzy Queries:
[source,js]
--------------------------------------------------
GET twitter/tweet/_validate/query?rewrite=true&all_shards=true
{
"query": {
"match": {
"user": {
"query": "kimchy",
"fuzziness": "auto"
}
}
}
}
--------------------------------------------------
// CONSOLE
Response:
[source,js]
--------------------------------------------------
{
"valid": true,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"explanations": [
{
"index": "twitter",
"shard": 0,
"valid": true,
"explanation": "+MatchNoDocsQuery(\"empty BooleanQuery\") #ConstantScore(MatchNoDocsQuery(\"empty BooleanQuery\"))"
},
{
"index": "twitter",
"shard": 1,
"valid": true,
"explanation": "+MatchNoDocsQuery(\"empty BooleanQuery\") #ConstantScore(MatchNoDocsQuery(\"empty BooleanQuery\"))"
},
{
"index": "twitter",
"shard": 2,
"valid": true,
"explanation": "(user:kimchi)^0.8333333"
},
{
"index": "twitter",
"shard": 3,
"valid": true,
"explanation": "user:kimchy"
},
{
"index": "twitter",
"shard": 4,
"valid": true,
"explanation": "+MatchNoDocsQuery(\"empty BooleanQuery\") #ConstantScore(MatchNoDocsQuery(\"empty BooleanQuery\"))"
}
]
}
--------------------------------------------------
// TESTRESPONSE

View File

@ -66,6 +66,10 @@
"rewrite": {
"type": "boolean",
"description": "Provide a more detailed explanation showing the actual Lucene query that will be executed."
},
"all_shards": {
"type": "boolean",
"description": "Execute validation on all shards instead of one random shard per index"
}
}
},