From d0bbe89c16db4acd4a644fbe5cc2c9e8fe4016d7 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Thu, 20 Oct 2016 12:33:32 +0200 Subject: [PATCH] Optimize query with types filter in the URL (t/t/_search) (#20979) This change adds a TypesQuery that checks if the disjunction of types should be rewritten to a MatchAllDocs query. The check is done only if the number of terms is below a threshold (16 by default and configurable via max_boolean_clause). --- .../index/mapper/TypeFieldMapper.java | 74 ++++++++++++++----- .../search/DefaultSearchContext.java | 4 +- .../index/mapper/TypeFieldTypeTests.java | 54 ++++++++++---- .../query/HasChildQueryBuilderTests.java | 2 +- .../index/query/TypeQueryBuilderTests.java | 2 +- .../search/DefaultSearchContextTests.java | 6 +- docs/reference/search/explain.asciidoc | 2 +- 7 files changed, 101 insertions(+), 43 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java index ac9aeedb365..399f4613c47 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java @@ -25,6 +25,9 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; +import org.apache.lucene.queries.TermsQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; @@ -41,6 +44,7 @@ import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData; import org.elasticsearch.index.query.QueryShardContext; import java.io.IOException; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Objects; @@ -144,7 +148,7 @@ public class TypeFieldMapper extends MetadataFieldMapper { if (indexOptions() == IndexOptions.NONE) { throw new AssertionError(); } - return new TypeQuery(indexedValueForSearch(value)); + return new TypesQuery(indexedValueForSearch(value)); } @Override @@ -161,26 +165,52 @@ public class TypeFieldMapper extends MetadataFieldMapper { } } - public static class TypeQuery extends Query { + /** + * Specialization for a disjunction over many _type + */ + public static class TypesQuery extends Query { + // Same threshold as TermsQuery + private static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16; - private final BytesRef type; + private final BytesRef[] types; - public TypeQuery(BytesRef type) { - this.type = Objects.requireNonNull(type); + public TypesQuery(BytesRef... types) { + if (types == null) { + throw new NullPointerException("types cannot be null."); + } + if (types.length == 0) { + throw new IllegalArgumentException("types must contains at least one value."); + } + this.types = types; } @Override public Query rewrite(IndexReader reader) throws IOException { - Term term = new Term(CONTENT_TYPE, type); - TermContext context = TermContext.build(reader.getContext(), term); - if (context.docFreq() == reader.maxDoc()) { - // All docs have the same type. - // Using a match_all query will help Lucene perform some optimizations - // For instance, match_all queries as filter clauses are automatically removed - return new MatchAllDocsQuery(); - } else { - return new ConstantScoreQuery(new TermQuery(term, context)); + final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount()); + if (types.length <= threshold) { + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + int totalDocFreq = 0; + for (BytesRef type : types) { + Term term = new Term(CONTENT_TYPE, type); + TermContext context = TermContext.build(reader.getContext(), term); + if (context.docFreq() == 0) { + // this _type is not present in the reader + continue; + } + totalDocFreq += context.docFreq(); + // strict equality should be enough ? + if (totalDocFreq >= reader.maxDoc()) { + assert totalDocFreq == reader.maxDoc(); + // Matches all docs since _type is a single value field + // Using a match_all query will help Lucene perform some optimizations + // For instance, match_all queries as filter clauses are automatically removed + return new MatchAllDocsQuery(); + } + bq.add(new TermQuery(term, context), BooleanClause.Occur.SHOULD); + } + return new ConstantScoreQuery(bq.build()); } + return new TermsQuery(CONTENT_TYPE, types); } @Override @@ -188,20 +218,26 @@ public class TypeFieldMapper extends MetadataFieldMapper { if (sameClassAs(obj) == false) { return false; } - TypeQuery that = (TypeQuery) obj; - return type.equals(that.type); + TypesQuery that = (TypesQuery) obj; + return Arrays.equals(types, that.types); } @Override public int hashCode() { - return 31 * classHash() + type.hashCode(); + return 31 * classHash() + Arrays.hashCode(types); } @Override public String toString(String field) { - return "_type:" + type; + StringBuilder builder = new StringBuilder(); + for (BytesRef type : types) { + if (builder.length() > 0) { + builder.append(' '); + } + builder.append(new Term(CONTENT_TYPE, type).toString()); + } + return builder.toString(); } - } private TypeFieldMapper(Settings indexSettings, MappedFieldType existing) { diff --git a/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java b/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java index a1c140639c1..1e36522794a 100644 --- a/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java +++ b/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java @@ -19,7 +19,6 @@ package org.elasticsearch.search; -import org.apache.lucene.queries.TermsQuery; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Collector; @@ -65,7 +64,6 @@ import org.elasticsearch.search.internal.ContextIndexSearcher; import org.elasticsearch.search.internal.ScrollContext; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.ShardSearchRequest; -import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.search.profile.Profilers; import org.elasticsearch.search.query.QueryPhaseExecutionException; import org.elasticsearch.search.query.QuerySearchResult; @@ -295,7 +293,7 @@ final class DefaultSearchContext extends SearchContext { for (int i = 0; i < typesBytes.length; i++) { typesBytes[i] = new BytesRef(types[i]); } - typesFilter = new TermsQuery(TypeFieldMapper.NAME, typesBytes); + typesFilter = new TypeFieldMapper.TypesQuery(typesBytes); } if (typesFilter == null && aliasFilter == null && hasNestedFields == false) { diff --git a/core/src/test/java/org/elasticsearch/index/mapper/TypeFieldTypeTests.java b/core/src/test/java/org/elasticsearch/index/mapper/TypeFieldTypeTests.java index 54217ea94ae..b7782e355c1 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/TypeFieldTypeTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/TypeFieldTypeTests.java @@ -34,11 +34,12 @@ import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.TypeFieldMapper; import org.junit.Before; +import java.io.IOException; + public class TypeFieldTypeTests extends FieldTypeTestCase { @Override protected MappedFieldType createDefaultFieldType() { @@ -56,20 +57,14 @@ public class TypeFieldTypeTests extends FieldTypeTestCase { }); } - public void testTermQuery() throws Exception { + public void testTermsQuery() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); - Document doc = new Document(); - StringField type = new StringField(TypeFieldMapper.NAME, "my_type", Store.NO); - doc.add(type); - w.addDocument(doc); - w.addDocument(doc); - IndexReader reader = DirectoryReader.open(w); + IndexReader reader = openReaderWithNewType("my_type", w); TypeFieldMapper.TypeFieldType ft = new TypeFieldMapper.TypeFieldType(); ft.setName(TypeFieldMapper.NAME); Query query = ft.termQuery("my_type", null); - assertEquals(new MatchAllDocsQuery(), query.rewrite(reader)); // Make sure that Lucene actually simplifies the query when there is a single type @@ -78,13 +73,44 @@ public class TypeFieldTypeTests extends FieldTypeTestCase { Query rewritten = new IndexSearcher(reader).rewrite(filteredQuery); assertEquals(userQuery, rewritten); - type.setStringValue("my_type2"); - w.addDocument(doc); + // ... and does not rewrite it if there is more than one type reader.close(); - reader = DirectoryReader.open(w); + reader = openReaderWithNewType("my_type2", w); + Query expected = new ConstantScoreQuery( + new BooleanQuery.Builder() + .add(new TermQuery(new Term(TypeFieldMapper.NAME, "my_type")), Occur.SHOULD) + .build() + ); + assertEquals(expected, query.rewrite(reader)); - assertEquals(new ConstantScoreQuery(new TermQuery(new Term(TypeFieldMapper.NAME, "my_type"))), query.rewrite(reader)); + BytesRef[] types = + new BytesRef[] {new BytesRef("my_type"), new BytesRef("my_type2"), new BytesRef("my_type3")}; + // the query should match all documents + query = new TypeFieldMapper.TypesQuery(types); + assertEquals(new MatchAllDocsQuery(), query.rewrite(reader)); + + reader.close(); + reader = openReaderWithNewType("unknown_type", w); + // the query cannot rewrite to a match all docs sinc unknown_type is not queried. + query = new TypeFieldMapper.TypesQuery(types); + expected = + new ConstantScoreQuery( + new BooleanQuery.Builder() + .add(new TermQuery(new Term(TypeFieldMapper.CONTENT_TYPE, types[0])), Occur.SHOULD) + .add(new TermQuery(new Term(TypeFieldMapper.CONTENT_TYPE, types[1])), Occur.SHOULD) + .build() + ); + rewritten = query.rewrite(reader); + assertEquals(expected, rewritten); IOUtils.close(reader, w, dir); } + + static DirectoryReader openReaderWithNewType(String type, IndexWriter writer) throws IOException { + Document doc = new Document(); + StringField typeField = new StringField(TypeFieldMapper.NAME, type, Store.NO); + doc.add(typeField); + writer.addDocument(doc); + return DirectoryReader.open(writer); + } } diff --git a/core/src/test/java/org/elasticsearch/index/query/HasChildQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/HasChildQueryBuilderTests.java index 00c5f4764a9..e491f64457d 100644 --- a/core/src/test/java/org/elasticsearch/index/query/HasChildQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/HasChildQueryBuilderTests.java @@ -280,7 +280,7 @@ public class HasChildQueryBuilderTests extends AbstractQueryTestCase