mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-28 16:09:10 +00:00
Optimize counts on simple queries.
Today we always collect in order to compute counts, but some of them can be easily optimized by using pre-computed index statistics. This is especially true in the case that there are no deletions, which should be common for the time-based data use-case. Counts on match_all queries can always be optimized, so requests like ``` GET index/_search?size=0 GET index/_search { "size": 0, "query" : { "match_all": {} } } ``` should now return almost instantly. Additionally, when there are no deletions, term queries are also optimized, so the below queries which all boil down to a single term query would also return almost immediately: ``` GET index/type/_search?size=0 GET index/_search { "size": 0, "query" : { "match": { "foo": "bar" } } } GET index/_search { "size": 0, "query" : { "constant_score": { "filter": { "exists": { "field": "foo" } } } } } ```
This commit is contained in:
parent
bd44dbe5cd
commit
5ff9ca9965
@ -20,6 +20,9 @@
|
|||||||
package org.elasticsearch.search.query;
|
package org.elasticsearch.search.query;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableMap;
|
import com.google.common.collect.ImmutableMap;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.queries.MinDocQuery;
|
import org.apache.lucene.queries.MinDocQuery;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
import org.elasticsearch.action.search.SearchType;
|
import org.elasticsearch.action.search.SearchType;
|
||||||
@ -100,23 +103,39 @@ public class QueryPhase implements SearchPhase {
|
|||||||
// here to make sure it happens during the QUERY phase
|
// here to make sure it happens during the QUERY phase
|
||||||
aggregationPhase.preProcess(searchContext);
|
aggregationPhase.preProcess(searchContext);
|
||||||
|
|
||||||
searchContext.queryResult().searchTimedOut(false);
|
boolean rescore = execute(searchContext, searchContext.searcher());
|
||||||
|
|
||||||
|
if (rescore) { // only if we do a regular search
|
||||||
|
rescorePhase.execute(searchContext);
|
||||||
|
}
|
||||||
|
suggestPhase.execute(searchContext);
|
||||||
|
aggregationPhase.execute(searchContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* In a package-private method so that it can be tested without having to
|
||||||
|
* wire everything (mapperService, etc.)
|
||||||
|
* @return whether the rescoring phase should be executed
|
||||||
|
*/
|
||||||
|
static boolean execute(SearchContext searchContext, final IndexSearcher searcher) throws QueryPhaseExecutionException {
|
||||||
|
QuerySearchResult queryResult = searchContext.queryResult();
|
||||||
|
queryResult.searchTimedOut(false);
|
||||||
|
|
||||||
final SearchType searchType = searchContext.searchType();
|
final SearchType searchType = searchContext.searchType();
|
||||||
boolean rescore = false;
|
boolean rescore = false;
|
||||||
try {
|
try {
|
||||||
searchContext.queryResult().from(searchContext.from());
|
queryResult.from(searchContext.from());
|
||||||
searchContext.queryResult().size(searchContext.size());
|
queryResult.size(searchContext.size());
|
||||||
|
|
||||||
final IndexSearcher searcher = searchContext.searcher();
|
|
||||||
Query query = searchContext.query();
|
Query query = searchContext.query();
|
||||||
|
|
||||||
final int totalNumDocs = searcher.getIndexReader().numDocs();
|
final int totalNumDocs = searcher.getIndexReader().numDocs();
|
||||||
int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
|
int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
|
||||||
|
|
||||||
Collector collector;
|
Collector collector;
|
||||||
final Callable<TopDocs> topDocsCallable;
|
Callable<TopDocs> topDocsCallable;
|
||||||
|
|
||||||
|
assert query == searcher.rewrite(query); // already rewritten
|
||||||
if (searchContext.size() == 0) { // no matter what the value of from is
|
if (searchContext.size() == 0) { // no matter what the value of from is
|
||||||
final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
|
final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
|
||||||
collector = totalHitCountCollector;
|
collector = totalHitCountCollector;
|
||||||
@ -240,36 +259,75 @@ public class QueryPhase implements SearchPhase {
|
|||||||
collector = new MinimumScoreCollector(collector, searchContext.minimumScore());
|
collector = new MinimumScoreCollector(collector, searchContext.minimumScore());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (collector.getClass() == TotalHitCountCollector.class) {
|
||||||
|
// Optimize counts in simple cases to return in constant time
|
||||||
|
// instead of using a collector
|
||||||
|
while (true) {
|
||||||
|
// remove wrappers that don't matter for counts
|
||||||
|
// this is necessary so that we don't only optimize match_all
|
||||||
|
// queries but also match_all queries that are nested in
|
||||||
|
// a constant_score query
|
||||||
|
if (query instanceof ConstantScoreQuery) {
|
||||||
|
query = ((ConstantScoreQuery) query).getQuery();
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (query.getClass() == MatchAllDocsQuery.class) {
|
||||||
|
collector = null;
|
||||||
|
topDocsCallable = new Callable<TopDocs>() {
|
||||||
|
@Override
|
||||||
|
public TopDocs call() throws Exception {
|
||||||
|
int count = searcher.getIndexReader().numDocs();
|
||||||
|
return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} else if (query.getClass() == TermQuery.class && searcher.getIndexReader().hasDeletions() == false) {
|
||||||
|
final Term term = ((TermQuery) query).getTerm();
|
||||||
|
collector = null;
|
||||||
|
topDocsCallable = new Callable<TopDocs>() {
|
||||||
|
@Override
|
||||||
|
public TopDocs call() throws Exception {
|
||||||
|
int count = 0;
|
||||||
|
for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
|
||||||
|
count += context.reader().docFreq(term);
|
||||||
|
}
|
||||||
|
return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
final boolean timeoutSet = searchContext.timeoutInMillis() != SearchService.NO_TIMEOUT.millis();
|
final boolean timeoutSet = searchContext.timeoutInMillis() != SearchService.NO_TIMEOUT.millis();
|
||||||
if (timeoutSet) {
|
if (timeoutSet && collector != null) { // collector might be null if no collection is actually needed
|
||||||
// TODO: change to use our own counter that uses the scheduler in ThreadPool
|
// TODO: change to use our own counter that uses the scheduler in ThreadPool
|
||||||
// throws TimeLimitingCollector.TimeExceededException when timeout has reached
|
// throws TimeLimitingCollector.TimeExceededException when timeout has reached
|
||||||
collector = Lucene.wrapTimeLimitingCollector(collector, searchContext.timeEstimateCounter(), searchContext.timeoutInMillis());
|
collector = Lucene.wrapTimeLimitingCollector(collector, searchContext.timeEstimateCounter(), searchContext.timeoutInMillis());
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
searchContext.searcher().search(query, collector);
|
if (collector != null) {
|
||||||
|
searcher.search(query, collector);
|
||||||
|
}
|
||||||
} catch (TimeLimitingCollector.TimeExceededException e) {
|
} catch (TimeLimitingCollector.TimeExceededException e) {
|
||||||
assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set";
|
assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set";
|
||||||
searchContext.queryResult().searchTimedOut(true);
|
queryResult.searchTimedOut(true);
|
||||||
} catch (Lucene.EarlyTerminationException e) {
|
} catch (Lucene.EarlyTerminationException e) {
|
||||||
assert terminateAfterSet : "EarlyTerminationException thrown even though terminateAfter wasn't set";
|
assert terminateAfterSet : "EarlyTerminationException thrown even though terminateAfter wasn't set";
|
||||||
searchContext.queryResult().terminatedEarly(true);
|
queryResult.terminatedEarly(true);
|
||||||
} finally {
|
} finally {
|
||||||
searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION);
|
searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION);
|
||||||
}
|
}
|
||||||
if (terminateAfterSet && searchContext.queryResult().terminatedEarly() == null) {
|
if (terminateAfterSet && queryResult.terminatedEarly() == null) {
|
||||||
searchContext.queryResult().terminatedEarly(false);
|
queryResult.terminatedEarly(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
searchContext.queryResult().topDocs(topDocsCallable.call());
|
queryResult.topDocs(topDocsCallable.call());
|
||||||
|
|
||||||
|
return rescore;
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
throw new QueryPhaseExecutionException(searchContext, "Failed to execute main query", e);
|
throw new QueryPhaseExecutionException(searchContext, "Failed to execute main query", e);
|
||||||
}
|
}
|
||||||
if (rescore) { // only if we do a regular search
|
|
||||||
rescorePhase.execute(searchContext);
|
|
||||||
}
|
|
||||||
suggestPhase.execute(searchContext);
|
|
||||||
aggregationPhase.execute(searchContext);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,167 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.search.query;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.MultiReader;
|
||||||
|
import org.apache.lucene.index.NoMergePolicy;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.Collector;
|
||||||
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
|
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.elasticsearch.index.query.ParsedQuery;
|
||||||
|
import org.elasticsearch.test.ESTestCase;
|
||||||
|
import org.elasticsearch.test.TestSearchContext;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
public class QueryPhaseTests extends ESTestCase {
|
||||||
|
|
||||||
|
private void countTestCase(Query query, IndexReader reader, boolean shouldCollect) throws Exception {
|
||||||
|
TestSearchContext context = new TestSearchContext();
|
||||||
|
context.parsedQuery(new ParsedQuery(query));
|
||||||
|
context.setSize(0);
|
||||||
|
|
||||||
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
final AtomicBoolean collected = new AtomicBoolean();
|
||||||
|
IndexSearcher contextSearcher = new IndexSearcher(reader) {
|
||||||
|
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
|
||||||
|
collected.set(true);
|
||||||
|
super.search(leaves, weight, collector);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
final boolean rescore = QueryPhase.execute(context, contextSearcher);
|
||||||
|
assertFalse(rescore);
|
||||||
|
assertEquals(searcher.count(query), context.queryResult().topDocs().totalHits);
|
||||||
|
assertEquals(shouldCollect, collected.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void countTestCase(boolean withDeletions) throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(getRandom(), dir, iwc);
|
||||||
|
final int numDocs = scaledRandomIntBetween(100, 200);
|
||||||
|
for (int i = 0; i < numDocs; ++i) {
|
||||||
|
Document doc = new Document();
|
||||||
|
if (randomBoolean()) {
|
||||||
|
doc.add(new StringField("foo", "bar", Store.NO));
|
||||||
|
}
|
||||||
|
if (randomBoolean()) {
|
||||||
|
doc.add(new StringField("foo", "baz", Store.NO));
|
||||||
|
}
|
||||||
|
if (withDeletions && (rarely() || i == 0)) {
|
||||||
|
doc.add(new StringField("delete", "yes", Store.NO));
|
||||||
|
}
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
if (withDeletions) {
|
||||||
|
w.deleteDocuments(new Term("delete", "yes"));
|
||||||
|
}
|
||||||
|
final IndexReader reader = w.getReader();
|
||||||
|
Query matchAll = new MatchAllDocsQuery();
|
||||||
|
Query matchAllCsq = new ConstantScoreQuery(matchAll);
|
||||||
|
Query tq = new TermQuery(new Term("foo", "bar"));
|
||||||
|
Query tCsq = new ConstantScoreQuery(tq);
|
||||||
|
BooleanQuery bq = new BooleanQuery();
|
||||||
|
bq.add(matchAll, Occur.SHOULD);
|
||||||
|
bq.add(tq, Occur.MUST);
|
||||||
|
|
||||||
|
countTestCase(matchAll, reader, false);
|
||||||
|
countTestCase(matchAllCsq, reader, false);
|
||||||
|
countTestCase(tq, reader, withDeletions);
|
||||||
|
countTestCase(tCsq, reader, withDeletions);
|
||||||
|
countTestCase(bq, reader, true);
|
||||||
|
reader.close();
|
||||||
|
w.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCountWithoutDeletions() throws Exception {
|
||||||
|
countTestCase(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCountWithDeletions() throws Exception {
|
||||||
|
countTestCase(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPostFilterDisablesCountOptimization() throws Exception {
|
||||||
|
TestSearchContext context = new TestSearchContext();
|
||||||
|
context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery()));
|
||||||
|
context.setSize(0);
|
||||||
|
|
||||||
|
final AtomicBoolean collected = new AtomicBoolean();
|
||||||
|
IndexSearcher contextSearcher = new IndexSearcher(new MultiReader()) {
|
||||||
|
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
|
||||||
|
collected.set(true);
|
||||||
|
super.search(leaves, weight, collector);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
QueryPhase.execute(context, contextSearcher);
|
||||||
|
assertEquals(0, context.queryResult().topDocs().totalHits);
|
||||||
|
assertFalse(collected.get());
|
||||||
|
|
||||||
|
context.parsedPostFilter(new ParsedQuery(new MatchNoDocsQuery()));
|
||||||
|
QueryPhase.execute(context, contextSearcher);
|
||||||
|
assertEquals(0, context.queryResult().topDocs().totalHits);
|
||||||
|
assertTrue(collected.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testMinScoreDisablesCountOptimization() throws Exception {
|
||||||
|
TestSearchContext context = new TestSearchContext();
|
||||||
|
context.parsedQuery(new ParsedQuery(new MatchAllDocsQuery()));
|
||||||
|
context.setSize(0);
|
||||||
|
|
||||||
|
final AtomicBoolean collected = new AtomicBoolean();
|
||||||
|
IndexSearcher contextSearcher = new IndexSearcher(new MultiReader()) {
|
||||||
|
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
|
||||||
|
collected.set(true);
|
||||||
|
super.search(leaves, weight, collector);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
QueryPhase.execute(context, contextSearcher);
|
||||||
|
assertEquals(0, context.queryResult().topDocs().totalHits);
|
||||||
|
assertFalse(collected.get());
|
||||||
|
|
||||||
|
context.minimumScore(1);
|
||||||
|
QueryPhase.execute(context, contextSearcher);
|
||||||
|
assertEquals(0, context.queryResult().topDocs().totalHits);
|
||||||
|
assertTrue(collected.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -83,6 +83,12 @@ public class TestSearchContext extends SearchContext {
|
|||||||
final ThreadPool threadPool;
|
final ThreadPool threadPool;
|
||||||
final Map<Class<?>, Collector> queryCollectors = new HashMap<>();
|
final Map<Class<?>, Collector> queryCollectors = new HashMap<>();
|
||||||
final IndexShard indexShard;
|
final IndexShard indexShard;
|
||||||
|
final Counter timeEstimateCounter = Counter.newCounter();
|
||||||
|
final QuerySearchResult queryResult = new QuerySearchResult();
|
||||||
|
ParsedQuery originalQuery;
|
||||||
|
ParsedQuery postFilter;
|
||||||
|
Query query;
|
||||||
|
Float minScore;
|
||||||
|
|
||||||
ContextIndexSearcher searcher;
|
ContextIndexSearcher searcher;
|
||||||
int size;
|
int size;
|
||||||
@ -363,12 +369,13 @@ public class TestSearchContext extends SearchContext {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SearchContext minimumScore(float minimumScore) {
|
public SearchContext minimumScore(float minimumScore) {
|
||||||
return null;
|
this.minScore = minimumScore;
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Float minimumScore() {
|
public Float minimumScore() {
|
||||||
return null;
|
return minScore;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -393,12 +400,13 @@ public class TestSearchContext extends SearchContext {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SearchContext parsedPostFilter(ParsedQuery postFilter) {
|
public SearchContext parsedPostFilter(ParsedQuery postFilter) {
|
||||||
return null;
|
this.postFilter = postFilter;
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ParsedQuery parsedPostFilter() {
|
public ParsedQuery parsedPostFilter() {
|
||||||
return null;
|
return postFilter;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -408,17 +416,19 @@ public class TestSearchContext extends SearchContext {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SearchContext parsedQuery(ParsedQuery query) {
|
public SearchContext parsedQuery(ParsedQuery query) {
|
||||||
return null;
|
this.originalQuery = query;
|
||||||
|
this.query = query.query();
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ParsedQuery parsedQuery() {
|
public ParsedQuery parsedQuery() {
|
||||||
return null;
|
return originalQuery;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Query query() {
|
public Query query() {
|
||||||
return null;
|
return query;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -537,7 +547,7 @@ public class TestSearchContext extends SearchContext {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public QuerySearchResult queryResult() {
|
public QuerySearchResult queryResult() {
|
||||||
return null;
|
return queryResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -580,7 +590,7 @@ public class TestSearchContext extends SearchContext {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Counter timeEstimateCounter() {
|
public Counter timeEstimateCounter() {
|
||||||
throw new UnsupportedOperationException();
|
return timeEstimateCounter;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
Loading…
x
Reference in New Issue
Block a user