Type filters should not have a performance impact when there is a single type. #17350

Today, if you call /index/type/_search instead of /index/_search, elasticsearch
will automatically insert a type filter to only match documents of the given
type. This commit uses a new TypeQuery instead of a TermQuery for this filter,
which rewrites to a MatchAllDocsQuery when all documents of a shard match the
filtered type. This is helpful because BooleanQuery has a special rewrite rule
to remove MatchAllDocsQuery as FILTER clauses. So for instance if your query is
`+body:"quick fox" #_type:my_type`, it will be rewritten to
`+body:"quick fox" #*:*` which is then rewritten to `body:"quick fox"`.
This commit is contained in:
Adrien Grand 2016-03-25 19:15:29 +01:00
parent 3e9f8a4c59
commit 4bd27bc2a0
5 changed files with 105 additions and 29 deletions

View File

@ -22,15 +22,16 @@ package org.elasticsearch.index.mapper.internal;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
@ -39,12 +40,12 @@ import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MetadataFieldMapper;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.query.QueryShardContext;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Objects;
/**
*
@ -133,12 +134,55 @@ public class TypeFieldMapper extends MetadataFieldMapper {
@Override
public Query termQuery(Object value, @Nullable QueryShardContext context) {
if (indexOptions() == IndexOptions.NONE) {
return new ConstantScoreQuery(new PrefixQuery(new Term(UidFieldMapper.NAME, Uid.typePrefixAsBytes(BytesRefs.toBytesRef(value)))));
throw new AssertionError();
}
return new ConstantScoreQuery(new TermQuery(createTerm(value)));
return new TypeQuery(indexedValueForSearch(value));
}
}
public static class TypeQuery extends Query {
private final BytesRef type;
public TypeQuery(BytesRef type) {
this.type = Objects.requireNonNull(type);
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Term term = new Term(CONTENT_TYPE, type);
TermContext context = TermContext.build(reader.getContext(), term);
if (context.docFreq() == reader.maxDoc()) {
// All docs have the same type.
// Using a match_all query will help Lucene perform some optimizations
// For instance, match_all queries as filter clauses are automatically removed
return new MatchAllDocsQuery();
} else {
return new ConstantScoreQuery(new TermQuery(term, context));
}
}
@Override
public boolean equals(Object obj) {
if (super.equals(obj) == false) {
return false;
}
TypeQuery that = (TypeQuery) obj;
return type.equals(that.type);
}
@Override
public int hashCode() {
return 31 * super.hashCode() + type.hashCode();
}
@Override
public String toString(String field) {
return "_type:" + type;
}
}
private TypeFieldMapper(Settings indexSettings, MappedFieldType existing) {
this(existing == null ? defaultFieldType(indexSettings) : existing.clone(),
indexSettings);

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.query;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
@ -74,15 +75,14 @@ public class TypeQueryBuilder extends AbstractQueryBuilder<TypeQueryBuilder> {
@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
Query filter;
//LUCENE 4 UPGRADE document mapper should use bytesref as well?
DocumentMapper documentMapper = context.getMapperService().documentMapper(type.utf8ToString());
if (documentMapper == null) {
filter = new TermQuery(new Term(TypeFieldMapper.NAME, type));
// no type means no documents
return new MatchNoDocsQuery();
} else {
filter = documentMapper.typeFilter();
return documentMapper.typeFilter();
}
return filter;
}
@Override

View File

@ -18,6 +18,23 @@
*/
package org.elasticsearch.index.mapper.internal;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.index.mapper.FieldTypeTestCase;
import org.elasticsearch.index.mapper.MappedFieldType;
@ -26,4 +43,36 @@ public class TypeFieldTypeTests extends FieldTypeTestCase {
protected MappedFieldType createDefaultFieldType() {
return new TypeFieldMapper.TypeFieldType();
}
public void testTermQuery() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
Document doc = new Document();
StringField type = new StringField(TypeFieldMapper.NAME, "my_type", Store.NO);
doc.add(type);
w.addDocument(doc);
w.addDocument(doc);
IndexReader reader = DirectoryReader.open(w);
TypeFieldMapper.TypeFieldType ft = new TypeFieldMapper.TypeFieldType();
ft.setName(TypeFieldMapper.NAME);
Query query = ft.termQuery("my_type", null);
assertEquals(new MatchAllDocsQuery(), query.rewrite(reader));
// Make sure that Lucene actually simplifies the query when there is a single type
Query userQuery = new PhraseQuery("body", "quick", "fox");
Query filteredQuery = new BooleanQuery.Builder().add(userQuery, Occur.MUST).add(query, Occur.FILTER).build();
Query rewritten = new IndexSearcher(reader).rewrite(filteredQuery);
assertEquals(userQuery, rewritten);
type.setStringValue("my_type2");
w.addDocument(doc);
reader.close();
reader = DirectoryReader.open(w);
assertEquals(new ConstantScoreQuery(new TermQuery(new Term(TypeFieldMapper.NAME, "my_type"))), query.rewrite(reader));
IOUtils.close(reader, w, dir);
}
}

View File

@ -275,12 +275,7 @@ public class HasChildQueryBuilderTests extends AbstractQueryTestCase<HasChildQue
assertThat(termQuery.getTerm().bytes(), equalTo(ids[0]));
//check the type filter
assertThat(booleanQuery.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.FILTER));
assertThat(booleanQuery.clauses().get(1).getQuery(), instanceOf(ConstantScoreQuery.class));
ConstantScoreQuery typeConstantScoreQuery = (ConstantScoreQuery) booleanQuery.clauses().get(1).getQuery();
assertThat(typeConstantScoreQuery.getQuery(), instanceOf(TermQuery.class));
TermQuery typeTermQuery = (TermQuery) typeConstantScoreQuery.getQuery();
assertThat(typeTermQuery.getTerm().field(), equalTo(TypeFieldMapper.NAME));
assertThat(typeTermQuery.getTerm().text(), equalTo(type));
assertEquals(new TypeFieldMapper.TypeQuery(new BytesRef(type)), booleanQuery.clauses().get(1).getQuery());
}
/**

View File

@ -19,17 +19,12 @@
package org.elasticsearch.index.query;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.mapper.internal.TypeFieldMapper;
import java.io.IOException;
import static org.hamcrest.Matchers.either;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
public class TypeQueryBuilderTests extends AbstractQueryTestCase<TypeQueryBuilder> {
@Override
@ -39,14 +34,7 @@ public class TypeQueryBuilderTests extends AbstractQueryTestCase<TypeQueryBuilde
@Override
protected void doAssertLuceneQuery(TypeQueryBuilder queryBuilder, Query query, QueryShardContext context) throws IOException {
assertThat(query, either(instanceOf(TermQuery.class)).or(instanceOf(ConstantScoreQuery.class)));
if (query instanceof ConstantScoreQuery) {
query = ((ConstantScoreQuery) query).getQuery();
assertThat(query, instanceOf(TermQuery.class));
}
TermQuery termQuery = (TermQuery) query;
assertThat(termQuery.getTerm().field(), equalTo(TypeFieldMapper.NAME));
assertThat(termQuery.getTerm().text(), equalTo(queryBuilder.type()));
assertEquals(new TypeFieldMapper.TypeQuery(new BytesRef(queryBuilder.type())), query);
}
public void testIllegalArgument() {