Optimize query with types filter in the URL (t/t/_search) (#20979)

This change adds a TypesQuery that checks if the disjunction of types should be rewritten to a MatchAllDocs query. The check is done only if the number of terms is below a threshold (16 by default and configurable via max_boolean_clause).
This commit is contained in:
Jim Ferenczi 2016-10-20 12:33:32 +02:00 committed by GitHub
parent 7ef4076160
commit d0bbe89c16
7 changed files with 101 additions and 43 deletions

View File

@ -25,6 +25,9 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
@ -41,6 +44,7 @@ import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData;
import org.elasticsearch.index.query.QueryShardContext;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@ -144,7 +148,7 @@ public class TypeFieldMapper extends MetadataFieldMapper {
if (indexOptions() == IndexOptions.NONE) {
throw new AssertionError();
}
return new TypeQuery(indexedValueForSearch(value));
return new TypesQuery(indexedValueForSearch(value));
}
@Override
@ -161,26 +165,52 @@ public class TypeFieldMapper extends MetadataFieldMapper {
}
}
public static class TypeQuery extends Query {
/**
* Specialization for a disjunction over many _type
*/
public static class TypesQuery extends Query {
// Same threshold as TermsQuery
private static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16;
private final BytesRef type;
private final BytesRef[] types;
public TypeQuery(BytesRef type) {
this.type = Objects.requireNonNull(type);
public TypesQuery(BytesRef... types) {
if (types == null) {
throw new NullPointerException("types cannot be null.");
}
if (types.length == 0) {
throw new IllegalArgumentException("types must contains at least one value.");
}
this.types = types;
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Term term = new Term(CONTENT_TYPE, type);
TermContext context = TermContext.build(reader.getContext(), term);
if (context.docFreq() == reader.maxDoc()) {
// All docs have the same type.
// Using a match_all query will help Lucene perform some optimizations
// For instance, match_all queries as filter clauses are automatically removed
return new MatchAllDocsQuery();
} else {
return new ConstantScoreQuery(new TermQuery(term, context));
final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
if (types.length <= threshold) {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
int totalDocFreq = 0;
for (BytesRef type : types) {
Term term = new Term(CONTENT_TYPE, type);
TermContext context = TermContext.build(reader.getContext(), term);
if (context.docFreq() == 0) {
// this _type is not present in the reader
continue;
}
totalDocFreq += context.docFreq();
// strict equality should be enough ?
if (totalDocFreq >= reader.maxDoc()) {
assert totalDocFreq == reader.maxDoc();
// Matches all docs since _type is a single value field
// Using a match_all query will help Lucene perform some optimizations
// For instance, match_all queries as filter clauses are automatically removed
return new MatchAllDocsQuery();
}
bq.add(new TermQuery(term, context), BooleanClause.Occur.SHOULD);
}
return new ConstantScoreQuery(bq.build());
}
return new TermsQuery(CONTENT_TYPE, types);
}
@Override
@ -188,20 +218,26 @@ public class TypeFieldMapper extends MetadataFieldMapper {
if (sameClassAs(obj) == false) {
return false;
}
TypeQuery that = (TypeQuery) obj;
return type.equals(that.type);
TypesQuery that = (TypesQuery) obj;
return Arrays.equals(types, that.types);
}
@Override
public int hashCode() {
return 31 * classHash() + type.hashCode();
return 31 * classHash() + Arrays.hashCode(types);
}
@Override
public String toString(String field) {
return "_type:" + type;
StringBuilder builder = new StringBuilder();
for (BytesRef type : types) {
if (builder.length() > 0) {
builder.append(' ');
}
builder.append(new Term(CONTENT_TYPE, type).toString());
}
return builder.toString();
}
}
private TypeFieldMapper(Settings indexSettings, MappedFieldType existing) {

View File

@ -19,7 +19,6 @@
package org.elasticsearch.search;
import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
@ -65,7 +64,6 @@ import org.elasticsearch.search.internal.ContextIndexSearcher;
import org.elasticsearch.search.internal.ScrollContext;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.internal.ShardSearchRequest;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.profile.Profilers;
import org.elasticsearch.search.query.QueryPhaseExecutionException;
import org.elasticsearch.search.query.QuerySearchResult;
@ -295,7 +293,7 @@ final class DefaultSearchContext extends SearchContext {
for (int i = 0; i < typesBytes.length; i++) {
typesBytes[i] = new BytesRef(types[i]);
}
typesFilter = new TermsQuery(TypeFieldMapper.NAME, typesBytes);
typesFilter = new TypeFieldMapper.TypesQuery(typesBytes);
}
if (typesFilter == null && aliasFilter == null && hasNestedFields == false) {

View File

@ -34,11 +34,12 @@ import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.TypeFieldMapper;
import org.junit.Before;
import java.io.IOException;
public class TypeFieldTypeTests extends FieldTypeTestCase {
@Override
protected MappedFieldType createDefaultFieldType() {
@ -56,20 +57,14 @@ public class TypeFieldTypeTests extends FieldTypeTestCase {
});
}
public void testTermQuery() throws Exception {
public void testTermsQuery() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
Document doc = new Document();
StringField type = new StringField(TypeFieldMapper.NAME, "my_type", Store.NO);
doc.add(type);
w.addDocument(doc);
w.addDocument(doc);
IndexReader reader = DirectoryReader.open(w);
IndexReader reader = openReaderWithNewType("my_type", w);
TypeFieldMapper.TypeFieldType ft = new TypeFieldMapper.TypeFieldType();
ft.setName(TypeFieldMapper.NAME);
Query query = ft.termQuery("my_type", null);
assertEquals(new MatchAllDocsQuery(), query.rewrite(reader));
// Make sure that Lucene actually simplifies the query when there is a single type
@ -78,13 +73,44 @@ public class TypeFieldTypeTests extends FieldTypeTestCase {
Query rewritten = new IndexSearcher(reader).rewrite(filteredQuery);
assertEquals(userQuery, rewritten);
type.setStringValue("my_type2");
w.addDocument(doc);
// ... and does not rewrite it if there is more than one type
reader.close();
reader = DirectoryReader.open(w);
reader = openReaderWithNewType("my_type2", w);
Query expected = new ConstantScoreQuery(
new BooleanQuery.Builder()
.add(new TermQuery(new Term(TypeFieldMapper.NAME, "my_type")), Occur.SHOULD)
.build()
);
assertEquals(expected, query.rewrite(reader));
assertEquals(new ConstantScoreQuery(new TermQuery(new Term(TypeFieldMapper.NAME, "my_type"))), query.rewrite(reader));
BytesRef[] types =
new BytesRef[] {new BytesRef("my_type"), new BytesRef("my_type2"), new BytesRef("my_type3")};
// the query should match all documents
query = new TypeFieldMapper.TypesQuery(types);
assertEquals(new MatchAllDocsQuery(), query.rewrite(reader));
reader.close();
reader = openReaderWithNewType("unknown_type", w);
// the query cannot rewrite to a match all docs sinc unknown_type is not queried.
query = new TypeFieldMapper.TypesQuery(types);
expected =
new ConstantScoreQuery(
new BooleanQuery.Builder()
.add(new TermQuery(new Term(TypeFieldMapper.CONTENT_TYPE, types[0])), Occur.SHOULD)
.add(new TermQuery(new Term(TypeFieldMapper.CONTENT_TYPE, types[1])), Occur.SHOULD)
.build()
);
rewritten = query.rewrite(reader);
assertEquals(expected, rewritten);
IOUtils.close(reader, w, dir);
}
static DirectoryReader openReaderWithNewType(String type, IndexWriter writer) throws IOException {
Document doc = new Document();
StringField typeField = new StringField(TypeFieldMapper.NAME, type, Store.NO);
doc.add(typeField);
writer.addDocument(doc);
return DirectoryReader.open(writer);
}
}

View File

@ -280,7 +280,7 @@ public class HasChildQueryBuilderTests extends AbstractQueryTestCase<HasChildQue
assertThat(termQuery.getTerm().bytes(), equalTo(ids[0]));
//check the type filter
assertThat(booleanQuery.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.FILTER));
assertEquals(new TypeFieldMapper.TypeQuery(new BytesRef(type)), booleanQuery.clauses().get(1).getQuery());
assertEquals(new TypeFieldMapper.TypesQuery(new BytesRef(type)), booleanQuery.clauses().get(1).getQuery());
}
@Override

View File

@ -40,7 +40,7 @@ public class TypeQueryBuilderTests extends AbstractQueryTestCase<TypeQueryBuilde
if (createShardContext().getMapperService().documentMapper(queryBuilder.type()) == null) {
assertEquals(new MatchNoDocsQuery(), query);
} else {
assertEquals(new TypeFieldMapper.TypeQuery(new BytesRef(queryBuilder.type())), query);
assertEquals(new TypeFieldMapper.TypesQuery(new BytesRef(queryBuilder.type())), query);
}
}

View File

@ -19,14 +19,12 @@
package org.elasticsearch.search;
import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.index.mapper.TypeFieldMapper;
import org.elasticsearch.search.DefaultSearchContext;
import org.elasticsearch.test.ESTestCase;
import static org.apache.lucene.search.BooleanClause.Occur.FILTER;
@ -38,13 +36,13 @@ public class DefaultSearchContextTests extends ESTestCase {
public void testCreateSearchFilter() {
Query searchFilter = DefaultSearchContext.createSearchFilter(new String[]{"type1", "type2"}, null, randomBoolean());
Query expectedQuery = new BooleanQuery.Builder()
.add(new TermsQuery(TypeFieldMapper.NAME, new BytesRef("type1"), new BytesRef("type2")), FILTER)
.add(new TypeFieldMapper.TypesQuery(new BytesRef("type1"), new BytesRef("type2")), FILTER)
.build();
assertThat(searchFilter, equalTo(expectedQuery));
searchFilter = DefaultSearchContext.createSearchFilter(new String[]{"type1", "type2"}, new MatchAllDocsQuery(), randomBoolean());
expectedQuery = new BooleanQuery.Builder()
.add(new TermsQuery(TypeFieldMapper.NAME, new BytesRef("type1"), new BytesRef("type2")), FILTER)
.add(new TypeFieldMapper.TypesQuery(new BytesRef("type1"), new BytesRef("type2")), FILTER)
.add(new MatchAllDocsQuery(), FILTER)
.build();
assertThat(searchFilter, equalTo(expectedQuery));

View File

@ -68,7 +68,7 @@ This will yield the following result:
"details" : [ ]
}, {
"value" : 1.0,
"description" : "_type:tweet, product of:",
"description" : "*:*, product of:",
"details" : [
{ "value" : 1.0, "description" : "boost", "details" : [ ] },
{ "value" : 1.0, "description" : "queryNorm", "details" : [ ] }