Optimize query with types filter in the URL (t/t/_search) (#20979)
This change adds a TypesQuery that checks if the disjunction of types should be rewritten to a MatchAllDocs query. The check is done only if the number of terms is below a threshold (16 by default and configurable via max_boolean_clause).
This commit is contained in:
parent
7ef4076160
commit
d0bbe89c16
|
@ -25,6 +25,9 @@ import org.apache.lucene.index.IndexOptions;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.queries.TermsQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -41,6 +44,7 @@ import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData;
|
|||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
@ -144,7 +148,7 @@ public class TypeFieldMapper extends MetadataFieldMapper {
|
|||
if (indexOptions() == IndexOptions.NONE) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
return new TypeQuery(indexedValueForSearch(value));
|
||||
return new TypesQuery(indexedValueForSearch(value));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -161,26 +165,52 @@ public class TypeFieldMapper extends MetadataFieldMapper {
|
|||
}
|
||||
}
|
||||
|
||||
public static class TypeQuery extends Query {
|
||||
/**
|
||||
* Specialization for a disjunction over many _type
|
||||
*/
|
||||
public static class TypesQuery extends Query {
|
||||
// Same threshold as TermsQuery
|
||||
private static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16;
|
||||
|
||||
private final BytesRef type;
|
||||
private final BytesRef[] types;
|
||||
|
||||
public TypeQuery(BytesRef type) {
|
||||
this.type = Objects.requireNonNull(type);
|
||||
public TypesQuery(BytesRef... types) {
|
||||
if (types == null) {
|
||||
throw new NullPointerException("types cannot be null.");
|
||||
}
|
||||
if (types.length == 0) {
|
||||
throw new IllegalArgumentException("types must contains at least one value.");
|
||||
}
|
||||
this.types = types;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
Term term = new Term(CONTENT_TYPE, type);
|
||||
TermContext context = TermContext.build(reader.getContext(), term);
|
||||
if (context.docFreq() == reader.maxDoc()) {
|
||||
// All docs have the same type.
|
||||
// Using a match_all query will help Lucene perform some optimizations
|
||||
// For instance, match_all queries as filter clauses are automatically removed
|
||||
return new MatchAllDocsQuery();
|
||||
} else {
|
||||
return new ConstantScoreQuery(new TermQuery(term, context));
|
||||
final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
|
||||
if (types.length <= threshold) {
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
int totalDocFreq = 0;
|
||||
for (BytesRef type : types) {
|
||||
Term term = new Term(CONTENT_TYPE, type);
|
||||
TermContext context = TermContext.build(reader.getContext(), term);
|
||||
if (context.docFreq() == 0) {
|
||||
// this _type is not present in the reader
|
||||
continue;
|
||||
}
|
||||
totalDocFreq += context.docFreq();
|
||||
// strict equality should be enough ?
|
||||
if (totalDocFreq >= reader.maxDoc()) {
|
||||
assert totalDocFreq == reader.maxDoc();
|
||||
// Matches all docs since _type is a single value field
|
||||
// Using a match_all query will help Lucene perform some optimizations
|
||||
// For instance, match_all queries as filter clauses are automatically removed
|
||||
return new MatchAllDocsQuery();
|
||||
}
|
||||
bq.add(new TermQuery(term, context), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
return new ConstantScoreQuery(bq.build());
|
||||
}
|
||||
return new TermsQuery(CONTENT_TYPE, types);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -188,20 +218,26 @@ public class TypeFieldMapper extends MetadataFieldMapper {
|
|||
if (sameClassAs(obj) == false) {
|
||||
return false;
|
||||
}
|
||||
TypeQuery that = (TypeQuery) obj;
|
||||
return type.equals(that.type);
|
||||
TypesQuery that = (TypesQuery) obj;
|
||||
return Arrays.equals(types, that.types);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return 31 * classHash() + type.hashCode();
|
||||
return 31 * classHash() + Arrays.hashCode(types);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "_type:" + type;
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (BytesRef type : types) {
|
||||
if (builder.length() > 0) {
|
||||
builder.append(' ');
|
||||
}
|
||||
builder.append(new Term(CONTENT_TYPE, type).toString());
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private TypeFieldMapper(Settings indexSettings, MappedFieldType existing) {
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
package org.elasticsearch.search;
|
||||
|
||||
import org.apache.lucene.queries.TermsQuery;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Collector;
|
||||
|
@ -65,7 +64,6 @@ import org.elasticsearch.search.internal.ContextIndexSearcher;
|
|||
import org.elasticsearch.search.internal.ScrollContext;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
import org.elasticsearch.search.internal.ShardSearchRequest;
|
||||
import org.elasticsearch.search.lookup.SearchLookup;
|
||||
import org.elasticsearch.search.profile.Profilers;
|
||||
import org.elasticsearch.search.query.QueryPhaseExecutionException;
|
||||
import org.elasticsearch.search.query.QuerySearchResult;
|
||||
|
@ -295,7 +293,7 @@ final class DefaultSearchContext extends SearchContext {
|
|||
for (int i = 0; i < typesBytes.length; i++) {
|
||||
typesBytes[i] = new BytesRef(types[i]);
|
||||
}
|
||||
typesFilter = new TermsQuery(TypeFieldMapper.NAME, typesBytes);
|
||||
typesFilter = new TypeFieldMapper.TypesQuery(typesBytes);
|
||||
}
|
||||
|
||||
if (typesFilter == null && aliasFilter == null && hasNestedFields == false) {
|
||||
|
|
|
@ -34,11 +34,12 @@ import org.apache.lucene.search.PhraseQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.TypeFieldMapper;
|
||||
import org.junit.Before;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class TypeFieldTypeTests extends FieldTypeTestCase {
|
||||
@Override
|
||||
protected MappedFieldType createDefaultFieldType() {
|
||||
|
@ -56,20 +57,14 @@ public class TypeFieldTypeTests extends FieldTypeTestCase {
|
|||
});
|
||||
}
|
||||
|
||||
public void testTermQuery() throws Exception {
|
||||
public void testTermsQuery() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
||||
Document doc = new Document();
|
||||
StringField type = new StringField(TypeFieldMapper.NAME, "my_type", Store.NO);
|
||||
doc.add(type);
|
||||
w.addDocument(doc);
|
||||
w.addDocument(doc);
|
||||
IndexReader reader = DirectoryReader.open(w);
|
||||
IndexReader reader = openReaderWithNewType("my_type", w);
|
||||
|
||||
TypeFieldMapper.TypeFieldType ft = new TypeFieldMapper.TypeFieldType();
|
||||
ft.setName(TypeFieldMapper.NAME);
|
||||
Query query = ft.termQuery("my_type", null);
|
||||
|
||||
assertEquals(new MatchAllDocsQuery(), query.rewrite(reader));
|
||||
|
||||
// Make sure that Lucene actually simplifies the query when there is a single type
|
||||
|
@ -78,13 +73,44 @@ public class TypeFieldTypeTests extends FieldTypeTestCase {
|
|||
Query rewritten = new IndexSearcher(reader).rewrite(filteredQuery);
|
||||
assertEquals(userQuery, rewritten);
|
||||
|
||||
type.setStringValue("my_type2");
|
||||
w.addDocument(doc);
|
||||
// ... and does not rewrite it if there is more than one type
|
||||
reader.close();
|
||||
reader = DirectoryReader.open(w);
|
||||
reader = openReaderWithNewType("my_type2", w);
|
||||
Query expected = new ConstantScoreQuery(
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(TypeFieldMapper.NAME, "my_type")), Occur.SHOULD)
|
||||
.build()
|
||||
);
|
||||
assertEquals(expected, query.rewrite(reader));
|
||||
|
||||
assertEquals(new ConstantScoreQuery(new TermQuery(new Term(TypeFieldMapper.NAME, "my_type"))), query.rewrite(reader));
|
||||
BytesRef[] types =
|
||||
new BytesRef[] {new BytesRef("my_type"), new BytesRef("my_type2"), new BytesRef("my_type3")};
|
||||
// the query should match all documents
|
||||
query = new TypeFieldMapper.TypesQuery(types);
|
||||
assertEquals(new MatchAllDocsQuery(), query.rewrite(reader));
|
||||
|
||||
reader.close();
|
||||
reader = openReaderWithNewType("unknown_type", w);
|
||||
// the query cannot rewrite to a match all docs sinc unknown_type is not queried.
|
||||
query = new TypeFieldMapper.TypesQuery(types);
|
||||
expected =
|
||||
new ConstantScoreQuery(
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(TypeFieldMapper.CONTENT_TYPE, types[0])), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term(TypeFieldMapper.CONTENT_TYPE, types[1])), Occur.SHOULD)
|
||||
.build()
|
||||
);
|
||||
rewritten = query.rewrite(reader);
|
||||
assertEquals(expected, rewritten);
|
||||
|
||||
IOUtils.close(reader, w, dir);
|
||||
}
|
||||
|
||||
static DirectoryReader openReaderWithNewType(String type, IndexWriter writer) throws IOException {
|
||||
Document doc = new Document();
|
||||
StringField typeField = new StringField(TypeFieldMapper.NAME, type, Store.NO);
|
||||
doc.add(typeField);
|
||||
writer.addDocument(doc);
|
||||
return DirectoryReader.open(writer);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -280,7 +280,7 @@ public class HasChildQueryBuilderTests extends AbstractQueryTestCase<HasChildQue
|
|||
assertThat(termQuery.getTerm().bytes(), equalTo(ids[0]));
|
||||
//check the type filter
|
||||
assertThat(booleanQuery.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.FILTER));
|
||||
assertEquals(new TypeFieldMapper.TypeQuery(new BytesRef(type)), booleanQuery.clauses().get(1).getQuery());
|
||||
assertEquals(new TypeFieldMapper.TypesQuery(new BytesRef(type)), booleanQuery.clauses().get(1).getQuery());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -40,7 +40,7 @@ public class TypeQueryBuilderTests extends AbstractQueryTestCase<TypeQueryBuilde
|
|||
if (createShardContext().getMapperService().documentMapper(queryBuilder.type()) == null) {
|
||||
assertEquals(new MatchNoDocsQuery(), query);
|
||||
} else {
|
||||
assertEquals(new TypeFieldMapper.TypeQuery(new BytesRef(queryBuilder.type())), query);
|
||||
assertEquals(new TypeFieldMapper.TypesQuery(new BytesRef(queryBuilder.type())), query);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,14 +19,12 @@
|
|||
|
||||
package org.elasticsearch.search;
|
||||
|
||||
import org.apache.lucene.queries.TermsQuery;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.lucene.search.Queries;
|
||||
import org.elasticsearch.index.mapper.TypeFieldMapper;
|
||||
import org.elasticsearch.search.DefaultSearchContext;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import static org.apache.lucene.search.BooleanClause.Occur.FILTER;
|
||||
|
@ -38,13 +36,13 @@ public class DefaultSearchContextTests extends ESTestCase {
|
|||
public void testCreateSearchFilter() {
|
||||
Query searchFilter = DefaultSearchContext.createSearchFilter(new String[]{"type1", "type2"}, null, randomBoolean());
|
||||
Query expectedQuery = new BooleanQuery.Builder()
|
||||
.add(new TermsQuery(TypeFieldMapper.NAME, new BytesRef("type1"), new BytesRef("type2")), FILTER)
|
||||
.add(new TypeFieldMapper.TypesQuery(new BytesRef("type1"), new BytesRef("type2")), FILTER)
|
||||
.build();
|
||||
assertThat(searchFilter, equalTo(expectedQuery));
|
||||
|
||||
searchFilter = DefaultSearchContext.createSearchFilter(new String[]{"type1", "type2"}, new MatchAllDocsQuery(), randomBoolean());
|
||||
expectedQuery = new BooleanQuery.Builder()
|
||||
.add(new TermsQuery(TypeFieldMapper.NAME, new BytesRef("type1"), new BytesRef("type2")), FILTER)
|
||||
.add(new TypeFieldMapper.TypesQuery(new BytesRef("type1"), new BytesRef("type2")), FILTER)
|
||||
.add(new MatchAllDocsQuery(), FILTER)
|
||||
.build();
|
||||
assertThat(searchFilter, equalTo(expectedQuery));
|
||||
|
|
|
@ -68,7 +68,7 @@ This will yield the following result:
|
|||
"details" : [ ]
|
||||
}, {
|
||||
"value" : 1.0,
|
||||
"description" : "_type:tweet, product of:",
|
||||
"description" : "*:*, product of:",
|
||||
"details" : [
|
||||
{ "value" : 1.0, "description" : "boost", "details" : [ ] },
|
||||
{ "value" : 1.0, "description" : "queryNorm", "details" : [ ] }
|
||||
|
|
Loading…
Reference in New Issue