From 4bd27bc2a0ec98c04f8087c93fb93d313da89df4 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 25 Mar 2016 19:15:29 +0100 Subject: [PATCH] Type filters should not have a performance impact when there is a single type. #17350 Today, if you call /index/type/_search instead of /index/_search, elasticsearch will automatically insert a type filter to only match documents of the given type. This commit uses a new TypeQuery instead of a TermQuery for this filter, which rewrites to a MatchAllDocsQuery when all documents of a shard match the filtered type. This is helpful because BooleanQuery has a special rewrite rule to remove MatchAllDocsQuery as FILTER clauses. So for instance if your query is `+body:"quick fox" #_type:my_type`, it will be rewritten to `+body:"quick fox" #*:*` which is then rewritten to `body:"quick fox"`. --- .../mapper/internal/TypeFieldMapper.java | 54 +++++++++++++++++-- .../index/query/TypeQueryBuilder.java | 8 +-- .../mapper/internal/TypeFieldTypeTests.java | 49 +++++++++++++++++ .../query/HasChildQueryBuilderTests.java | 7 +-- .../index/query/TypeQueryBuilderTests.java | 16 +----- 5 files changed, 105 insertions(+), 29 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java index b9e3434fc25..e2b690cacad 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java @@ -22,15 +22,16 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.common.Nullable; -import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -39,12 +40,12 @@ import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MetadataFieldMapper; import org.elasticsearch.index.mapper.ParseContext; -import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.query.QueryShardContext; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Objects; /** * @@ -133,12 +134,55 @@ public class TypeFieldMapper extends MetadataFieldMapper { @Override public Query termQuery(Object value, @Nullable QueryShardContext context) { if (indexOptions() == IndexOptions.NONE) { - return new ConstantScoreQuery(new PrefixQuery(new Term(UidFieldMapper.NAME, Uid.typePrefixAsBytes(BytesRefs.toBytesRef(value))))); + throw new AssertionError(); } - return new ConstantScoreQuery(new TermQuery(createTerm(value))); + return new TypeQuery(indexedValueForSearch(value)); } } + public static class TypeQuery extends Query { + + private final BytesRef type; + + public TypeQuery(BytesRef type) { + this.type = Objects.requireNonNull(type); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Term term = new Term(CONTENT_TYPE, type); + TermContext context = TermContext.build(reader.getContext(), term); + if (context.docFreq() == reader.maxDoc()) { + // All docs have the same type. + // Using a match_all query will help Lucene perform some optimizations + // For instance, match_all queries as filter clauses are automatically removed + return new MatchAllDocsQuery(); + } else { + return new ConstantScoreQuery(new TermQuery(term, context)); + } + } + + @Override + public boolean equals(Object obj) { + if (super.equals(obj) == false) { + return false; + } + TypeQuery that = (TypeQuery) obj; + return type.equals(that.type); + } + + @Override + public int hashCode() { + return 31 * super.hashCode() + type.hashCode(); + } + + @Override + public String toString(String field) { + return "_type:" + type; + } + + } + private TypeFieldMapper(Settings indexSettings, MappedFieldType existing) { this(existing == null ? defaultFieldType(indexSettings) : existing.clone(), indexSettings); diff --git a/core/src/main/java/org/elasticsearch/index/query/TypeQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/TypeQueryBuilder.java index 975736e842a..12fd0e1a9b7 100644 --- a/core/src/main/java/org/elasticsearch/index/query/TypeQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/TypeQueryBuilder.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.query; import org.apache.lucene.index.Term; +import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; @@ -74,15 +75,14 @@ public class TypeQueryBuilder extends AbstractQueryBuilder { @Override protected Query doToQuery(QueryShardContext context) throws IOException { - Query filter; //LUCENE 4 UPGRADE document mapper should use bytesref as well? DocumentMapper documentMapper = context.getMapperService().documentMapper(type.utf8ToString()); if (documentMapper == null) { - filter = new TermQuery(new Term(TypeFieldMapper.NAME, type)); + // no type means no documents + return new MatchNoDocsQuery(); } else { - filter = documentMapper.typeFilter(); + return documentMapper.typeFilter(); } - return filter; } @Override diff --git a/core/src/test/java/org/elasticsearch/index/mapper/internal/TypeFieldTypeTests.java b/core/src/test/java/org/elasticsearch/index/mapper/internal/TypeFieldTypeTests.java index c01b04584eb..91216983b70 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/internal/TypeFieldTypeTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/internal/TypeFieldTypeTests.java @@ -18,6 +18,23 @@ */ package org.elasticsearch.index.mapper.internal; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; import org.elasticsearch.index.mapper.FieldTypeTestCase; import org.elasticsearch.index.mapper.MappedFieldType; @@ -26,4 +43,36 @@ public class TypeFieldTypeTests extends FieldTypeTestCase { protected MappedFieldType createDefaultFieldType() { return new TypeFieldMapper.TypeFieldType(); } + + public void testTermQuery() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); + Document doc = new Document(); + StringField type = new StringField(TypeFieldMapper.NAME, "my_type", Store.NO); + doc.add(type); + w.addDocument(doc); + w.addDocument(doc); + IndexReader reader = DirectoryReader.open(w); + + TypeFieldMapper.TypeFieldType ft = new TypeFieldMapper.TypeFieldType(); + ft.setName(TypeFieldMapper.NAME); + Query query = ft.termQuery("my_type", null); + + assertEquals(new MatchAllDocsQuery(), query.rewrite(reader)); + + // Make sure that Lucene actually simplifies the query when there is a single type + Query userQuery = new PhraseQuery("body", "quick", "fox"); + Query filteredQuery = new BooleanQuery.Builder().add(userQuery, Occur.MUST).add(query, Occur.FILTER).build(); + Query rewritten = new IndexSearcher(reader).rewrite(filteredQuery); + assertEquals(userQuery, rewritten); + + type.setStringValue("my_type2"); + w.addDocument(doc); + reader.close(); + reader = DirectoryReader.open(w); + + assertEquals(new ConstantScoreQuery(new TermQuery(new Term(TypeFieldMapper.NAME, "my_type"))), query.rewrite(reader)); + + IOUtils.close(reader, w, dir); + } } diff --git a/core/src/test/java/org/elasticsearch/index/query/HasChildQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/HasChildQueryBuilderTests.java index 4715574b5a7..675ad954e03 100644 --- a/core/src/test/java/org/elasticsearch/index/query/HasChildQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/HasChildQueryBuilderTests.java @@ -275,12 +275,7 @@ public class HasChildQueryBuilderTests extends AbstractQueryTestCase { @Override @@ -39,14 +34,7 @@ public class TypeQueryBuilderTests extends AbstractQueryTestCase