From 78c2f1063ac4a38943ea53fdd646cdb5646362f4 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 18 Aug 2015 17:01:47 +0200 Subject: [PATCH] Optimize sorted scroll when sorting by `_doc`. This change means that we will be able to remove the `SCAN` search type in 3.0 and recommend users to use sorted scrolls instead. --- .../apache/lucene/queries/MinDocQuery.java | 119 ++++++++++++++++++ .../percolator/PercolateContext.java | 17 +-- .../elasticsearch/search/SearchService.java | 39 ++++-- .../search/internal/DefaultSearchContext.java | 21 +--- .../internal/FilteredSearchContext.java | 20 +-- .../search/internal/ScrollContext.java | 33 +++++ .../search/internal/SearchContext.java | 9 +- .../search/internal/SubSearchContext.java | 10 +- .../search/query/QueryPhase.java | 73 ++++++++--- .../search/scan/ScanContext.java | 96 +------------- .../lucene/queries/MinDocQueryTests.java | 61 +++++++++ .../search/scan/ScanContextTests.java | 29 ----- .../search/scroll/DuelScrollIT.java | 83 ++++++++++++ .../elasticsearch/test/TestSearchContext.java | 29 +++-- 14 files changed, 407 insertions(+), 232 deletions(-) create mode 100644 core/src/main/java/org/apache/lucene/queries/MinDocQuery.java create mode 100644 core/src/main/java/org/elasticsearch/search/internal/ScrollContext.java create mode 100644 core/src/test/java/org/apache/lucene/queries/MinDocQueryTests.java diff --git a/core/src/main/java/org/apache/lucene/queries/MinDocQuery.java b/core/src/main/java/org/apache/lucene/queries/MinDocQuery.java new file mode 100644 index 00000000000..169c017804b --- /dev/null +++ b/core/src/main/java/org/apache/lucene/queries/MinDocQuery.java @@ -0,0 +1,119 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lucene.queries; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.Bits; + +import java.io.IOException; + +/** A {@link Query} that only matches documents that are greater than or equal + * to a configured doc ID. */ +public final class MinDocQuery extends Query { + + private final int minDoc; + + /** Sole constructor. */ + public MinDocQuery(int minDoc) { + this.minDoc = minDoc; + } + + @Override + public int hashCode() { + return 31 * super.hashCode() + minDoc; + } + + @Override + public boolean equals(Object obj) { + if (super.equals(obj) == false) { + return false; + } + MinDocQuery that = (MinDocQuery) obj; + return minDoc == that.minDoc; + } + + @Override + public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + return new ConstantScoreWeight(this) { + @Override + public Scorer scorer(LeafReaderContext context, final Bits acceptDocs) throws IOException { + final int maxDoc = context.reader().maxDoc(); + if (context.docBase + maxDoc <= minDoc) { + return null; + } + final int segmentMinDoc = Math.max(0, minDoc - context.docBase); + final DocIdSetIterator disi = new DocIdSetIterator() { + + int doc = -1; + + @Override + public int docID() { + return doc; + } + + @Override + public int nextDoc() throws IOException { + return advance(doc + 1); + } + + @Override + public int advance(int target) throws IOException { + assert target > doc; + if (doc == -1) { + // skip directly to minDoc + doc = Math.max(target, segmentMinDoc); + } else { + doc = target; + } + while (doc < maxDoc) { + if (acceptDocs == null || acceptDocs.get(doc)) { + break; + } + doc += 1; + } + if (doc >= maxDoc) { + doc = NO_MORE_DOCS; + } + return doc; + } + + @Override + public long cost() { + return maxDoc - segmentMinDoc; + } + + }; + return new ConstantScoreScorer(this, score(), disi); + } + }; + } + + @Override + public String toString(String field) { + return "MinDocQuery(minDoc=" + minDoc + ")"; + } +} diff --git a/core/src/main/java/org/elasticsearch/percolator/PercolateContext.java b/core/src/main/java/org/elasticsearch/percolator/PercolateContext.java index 3e79ef4e783..d42b858ce7b 100644 --- a/core/src/main/java/org/elasticsearch/percolator/PercolateContext.java +++ b/core/src/main/java/org/elasticsearch/percolator/PercolateContext.java @@ -27,7 +27,6 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Counter; @@ -53,7 +52,6 @@ import org.elasticsearch.index.query.ParsedQuery; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.script.ScriptService; -import org.elasticsearch.search.Scroll; import org.elasticsearch.search.SearchHitField; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.aggregations.SearchContextAggregations; @@ -68,6 +66,7 @@ import org.elasticsearch.search.highlight.SearchContextHighlight; import org.elasticsearch.search.internal.ContextIndexSearcher; import org.elasticsearch.search.internal.InternalSearchHit; import org.elasticsearch.search.internal.InternalSearchHitField; +import org.elasticsearch.search.internal.ScrollContext; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.ShardSearchRequest; import org.elasticsearch.search.lookup.LeafSearchLookup; @@ -348,12 +347,12 @@ public class PercolateContext extends SearchContext { } @Override - public Scroll scroll() { + public ScrollContext scrollContext() { throw new UnsupportedOperationException(); } @Override - public SearchContext scroll(Scroll scroll) { + public SearchContext scrollContext(ScrollContext scroll) { throw new UnsupportedOperationException(); } @@ -621,16 +620,6 @@ public class PercolateContext extends SearchContext { throw new UnsupportedOperationException(); } - @Override - public void lastEmittedDoc(ScoreDoc doc) { - throw new UnsupportedOperationException(); - } - - @Override - public ScoreDoc lastEmittedDoc() { - throw new UnsupportedOperationException(); - } - @Override public DfsSearchResult dfsResult() { throw new UnsupportedOperationException(); diff --git a/core/src/main/java/org/elasticsearch/search/SearchService.java b/core/src/main/java/org/elasticsearch/search/SearchService.java index 50c5053c505..fe85bba2919 100644 --- a/core/src/main/java/org/elasticsearch/search/SearchService.java +++ b/core/src/main/java/org/elasticsearch/search/SearchService.java @@ -271,7 +271,7 @@ public class SearchService extends AbstractLifecycleComponent { throw new IllegalArgumentException("aggregations are not supported with search_type=scan"); } - if (context.scroll() == null) { + if (context.scrollContext() == null || context.scrollContext().scroll == null) { throw new ElasticsearchException("Scroll must be provided when scanning..."); } @@ -319,7 +319,7 @@ public class SearchService extends AbstractLifecycleComponent { try { shortcutDocIdsToLoadForScanning(context); fetchPhase.execute(context); - if (context.scroll() == null || context.fetchResult().hits().hits().length < context.size()) { + if (context.scrollContext() == null || context.fetchResult().hits().hits().length < context.size()) { freeContext(request.id()); } else { contextProcessedSuccessfully(context); @@ -362,7 +362,7 @@ public class SearchService extends AbstractLifecycleComponent { loadOrExecuteQueryPhase(request, context, queryPhase); - if (context.queryResult().topDocs().scoreDocs.length == 0 && context.scroll() == null) { + if (context.queryResult().topDocs().scoreDocs.length == 0 && context.scrollContext() == null) { freeContext(context.id()); } else { contextProcessedSuccessfully(context); @@ -416,7 +416,7 @@ public class SearchService extends AbstractLifecycleComponent { shardSearchStats.onPreQueryPhase(context); long time = System.nanoTime(); queryPhase.execute(context); - if (context.queryResult().topDocs().scoreDocs.length == 0 && context.scroll() == null) { + if (context.queryResult().topDocs().scoreDocs.length == 0 && context.scrollContext() == null) { // no hits, we can release the context since there will be no fetch phase freeContext(context.id()); } else { @@ -434,6 +434,16 @@ public class SearchService extends AbstractLifecycleComponent { } } + private boolean fetchPhaseShouldFreeContext(SearchContext context) { + if (context.scrollContext() == null) { + // simple search, no scroll + return true; + } else { + // scroll request, but the scroll was not extended + return context.scrollContext().scroll == null; + } + } + public QueryFetchSearchResult executeFetchPhase(ShardSearchRequest request) { final SearchContext context = createAndPutContext(request); contextProcessing(context); @@ -453,7 +463,7 @@ public class SearchService extends AbstractLifecycleComponent { try { shortcutDocIdsToLoad(context); fetchPhase.execute(context); - if (context.scroll() == null) { + if (fetchPhaseShouldFreeContext(context)) { freeContext(context.id()); } else { contextProcessedSuccessfully(context); @@ -493,7 +503,7 @@ public class SearchService extends AbstractLifecycleComponent { try { shortcutDocIdsToLoad(context); fetchPhase.execute(context); - if (context.scroll() == null) { + if (fetchPhaseShouldFreeContext(context)) { freeContext(request.id()); } else { contextProcessedSuccessfully(context); @@ -533,7 +543,7 @@ public class SearchService extends AbstractLifecycleComponent { try { shortcutDocIdsToLoad(context); fetchPhase.execute(context); - if (context.scroll() == null) { + if (fetchPhaseShouldFreeContext(context)) { freeContext(request.id()); } else { contextProcessedSuccessfully(context); @@ -559,13 +569,13 @@ public class SearchService extends AbstractLifecycleComponent { final ShardSearchStats shardSearchStats = context.indexShard().searchService(); try { if (request.lastEmittedDoc() != null) { - context.lastEmittedDoc(request.lastEmittedDoc()); + context.scrollContext().lastEmittedDoc = request.lastEmittedDoc(); } context.docIdsToLoad(request.docIds(), 0, request.docIdsSize()); shardSearchStats.onPreFetchPhase(context); long time = System.nanoTime(); fetchPhase.execute(context); - if (context.scroll() == null) { + if (fetchPhaseShouldFreeContext(context)) { freeContext(request.id()); } else { contextProcessedSuccessfully(context); @@ -620,7 +630,10 @@ public class SearchService extends AbstractLifecycleComponent { SearchContext context = new DefaultSearchContext(idGenerator.incrementAndGet(), request, shardTarget, engineSearcher, indexService, indexShard, scriptService, pageCacheRecycler, bigArrays, threadPool.estimatedTimeInMillisCounter(), parseFieldMatcher, defaultSearchTimeout); SearchContext.setCurrent(context); try { - context.scroll(request.scroll()); + if (request.scroll() != null) { + context.scrollContext(new ScrollContext()); + context.scrollContext().scroll = request.scroll(); + } parseTemplate(request, context); parseSource(context, request.source()); @@ -673,7 +686,7 @@ public class SearchService extends AbstractLifecycleComponent { if (context != null) { try { context.indexShard().searchService().onFreeContext(context); - if (context.scroll() != null) { + if (context.scrollContext() != null) { context.indexShard().searchService().onFreeScrollContext(context); } } finally { @@ -686,7 +699,7 @@ public class SearchService extends AbstractLifecycleComponent { public void freeAllScrollContexts() { for (SearchContext searchContext : activeContexts.values()) { - if (searchContext.scroll() != null) { + if (searchContext.scrollContext() != null) { freeContext(searchContext.id()); } } @@ -880,7 +893,7 @@ public class SearchService extends AbstractLifecycleComponent { private void processScroll(InternalScrollSearchRequest request, SearchContext context) { // process scroll context.from(context.from() + context.size()); - context.scroll(request.scroll()); + context.scrollContext().scroll = request.scroll(); // update the context keep alive based on the new scroll value if (request.scroll() != null && request.scroll().keepAlive() != null) { context.keepAlive(request.scroll().keepAlive().millis()); diff --git a/core/src/main/java/org/elasticsearch/search/internal/DefaultSearchContext.java b/core/src/main/java/org/elasticsearch/search/internal/DefaultSearchContext.java index 03c13c61e93..a3015b94bab 100644 --- a/core/src/main/java/org/elasticsearch/search/internal/DefaultSearchContext.java +++ b/core/src/main/java/org/elasticsearch/search/internal/DefaultSearchContext.java @@ -49,7 +49,6 @@ import org.elasticsearch.index.query.ParsedQuery; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.script.ScriptService; -import org.elasticsearch.search.Scroll; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.aggregations.SearchContextAggregations; import org.elasticsearch.search.dfs.DfsSearchResult; @@ -98,7 +97,7 @@ public class DefaultSearchContext extends SearchContext { // terminate after count private int terminateAfter = DEFAULT_TERMINATE_AFTER; private List groupStats; - private Scroll scroll; + private ScrollContext scrollContext; private boolean explain; private boolean version = false; // by default, we don't return versions private List fieldNames; @@ -290,13 +289,13 @@ public class DefaultSearchContext extends SearchContext { } @Override - public Scroll scroll() { - return this.scroll; + public ScrollContext scrollContext() { + return this.scrollContext; } @Override - public SearchContext scroll(Scroll scroll) { - this.scroll = scroll; + public SearchContext scrollContext(ScrollContext scrollContext) { + this.scrollContext = scrollContext; return this; } @@ -652,16 +651,6 @@ public class DefaultSearchContext extends SearchContext { this.keepAlive = keepAlive; } - @Override - public void lastEmittedDoc(ScoreDoc doc) { - this.lastEmittedDoc = doc; - } - - @Override - public ScoreDoc lastEmittedDoc() { - return lastEmittedDoc; - } - @Override public SearchLookup lookup() { // TODO: The types should take into account the parsing context in QueryParserContext... diff --git a/core/src/main/java/org/elasticsearch/search/internal/FilteredSearchContext.java b/core/src/main/java/org/elasticsearch/search/internal/FilteredSearchContext.java index e2f6b48f02d..2f79d03234e 100644 --- a/core/src/main/java/org/elasticsearch/search/internal/FilteredSearchContext.java +++ b/core/src/main/java/org/elasticsearch/search/internal/FilteredSearchContext.java @@ -23,7 +23,6 @@ import com.carrotsearch.hppc.ObjectObjectAssociativeContainer; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.util.Counter; import org.elasticsearch.action.search.SearchType; @@ -42,7 +41,6 @@ import org.elasticsearch.index.query.ParsedQuery; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.script.ScriptService; -import org.elasticsearch.search.Scroll; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.aggregations.SearchContextAggregations; import org.elasticsearch.search.dfs.DfsSearchResult; @@ -154,13 +152,13 @@ public abstract class FilteredSearchContext extends SearchContext { } @Override - public Scroll scroll() { - return in.scroll(); + public ScrollContext scrollContext() { + return in.scrollContext(); } @Override - public SearchContext scroll(Scroll scroll) { - return in.scroll(scroll); + public SearchContext scrollContext(ScrollContext scroll) { + return in.scrollContext(scroll); } @Override @@ -483,16 +481,6 @@ public abstract class FilteredSearchContext extends SearchContext { in.keepAlive(keepAlive); } - @Override - public void lastEmittedDoc(ScoreDoc doc) { - in.lastEmittedDoc(doc); - } - - @Override - public ScoreDoc lastEmittedDoc() { - return in.lastEmittedDoc(); - } - @Override public SearchLookup lookup() { return in.lookup(); diff --git a/core/src/main/java/org/elasticsearch/search/internal/ScrollContext.java b/core/src/main/java/org/elasticsearch/search/internal/ScrollContext.java new file mode 100644 index 00000000000..1744b6fd745 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/internal/ScrollContext.java @@ -0,0 +1,33 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.internal; + +import org.apache.lucene.search.ScoreDoc; +import org.elasticsearch.search.Scroll; + +/** Wrapper around information that needs to stay around when scrolling. */ +public class ScrollContext { + + public int totalHits = -1; + public float maxScore; + public ScoreDoc lastEmittedDoc; + public Scroll scroll; + +} diff --git a/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java b/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java index 72feec7ba3e..781d13a3b86 100644 --- a/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java +++ b/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java @@ -24,7 +24,6 @@ import com.google.common.collect.MultimapBuilder; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.util.Counter; import org.elasticsearch.action.search.SearchType; @@ -159,9 +158,9 @@ public abstract class SearchContext implements Releasable, HasContextAndHeaders protected abstract long nowInMillisImpl(); - public abstract Scroll scroll(); + public abstract ScrollContext scrollContext(); - public abstract SearchContext scroll(Scroll scroll); + public abstract SearchContext scrollContext(ScrollContext scroll); public abstract SearchContextAggregations aggregations(); @@ -303,10 +302,6 @@ public abstract class SearchContext implements Releasable, HasContextAndHeaders public abstract void keepAlive(long keepAlive); - public abstract void lastEmittedDoc(ScoreDoc doc); - - public abstract ScoreDoc lastEmittedDoc(); - public abstract SearchLookup lookup(); public abstract DfsSearchResult dfsResult(); diff --git a/core/src/main/java/org/elasticsearch/search/internal/SubSearchContext.java b/core/src/main/java/org/elasticsearch/search/internal/SubSearchContext.java index f70c9a7e1a4..a1a6fd0abff 100644 --- a/core/src/main/java/org/elasticsearch/search/internal/SubSearchContext.java +++ b/core/src/main/java/org/elasticsearch/search/internal/SubSearchContext.java @@ -21,13 +21,10 @@ package org.elasticsearch.search.internal; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.util.Counter; import org.elasticsearch.action.search.SearchType; import org.elasticsearch.index.query.ParsedQuery; -import org.elasticsearch.search.Scroll; import org.elasticsearch.search.aggregations.SearchContextAggregations; import org.elasticsearch.search.fetch.FetchSearchResult; import org.elasticsearch.search.fetch.innerhits.InnerHitsContext; @@ -101,7 +98,7 @@ public class SubSearchContext extends FilteredSearchContext { } @Override - public SearchContext scroll(Scroll scroll) { + public SearchContext scrollContext(ScrollContext scrollContext) { throw new UnsupportedOperationException("Not supported"); } @@ -304,11 +301,6 @@ public class SubSearchContext extends FilteredSearchContext { throw new UnsupportedOperationException("Not supported"); } - @Override - public void lastEmittedDoc(ScoreDoc doc) { - throw new UnsupportedOperationException("Not supported"); - } - @Override public QuerySearchResult queryResult() { return querySearchResult; diff --git a/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java index a9105d59237..8f21f460130 100644 --- a/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java +++ b/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java @@ -21,12 +21,16 @@ package org.elasticsearch.search.query; import com.google.common.collect.ImmutableMap; +import org.apache.lucene.queries.MinDocQuery; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Collector; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MultiCollector; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; import org.apache.lucene.search.TimeLimitingCollector; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocsCollector; @@ -43,8 +47,8 @@ import org.elasticsearch.search.SearchParseElement; import org.elasticsearch.search.SearchPhase; import org.elasticsearch.search.SearchService; import org.elasticsearch.search.aggregations.AggregationPhase; +import org.elasticsearch.search.internal.ScrollContext; import org.elasticsearch.search.internal.SearchContext; -import org.elasticsearch.search.internal.SearchContext.Lifetime; import org.elasticsearch.search.rescore.RescorePhase; import org.elasticsearch.search.rescore.RescoreSearchContext; import org.elasticsearch.search.scan.ScanContext.ScanCollector; @@ -52,7 +56,6 @@ import org.elasticsearch.search.sort.SortParseElement; import org.elasticsearch.search.sort.TrackScoresParseElement; import org.elasticsearch.search.suggest.SuggestPhase; -import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -115,6 +118,7 @@ public class QueryPhase implements SearchPhase { searchContext.queryResult().searchTimedOut(false); + final SearchType searchType = searchContext.searchType(); boolean rescore = false; try { searchContext.queryResult().from(searchContext.from()); @@ -138,7 +142,7 @@ public class QueryPhase implements SearchPhase { return new TopDocs(totalHitCountCollector.getTotalHits(), Lucene.EMPTY_SCORE_DOCS, 0); } }; - } else if (searchContext.searchType() == SearchType.SCAN) { + } else if (searchType == SearchType.SCAN) { query = searchContext.scanContext().wrapQuery(query); final ScanCollector scanCollector = searchContext.scanContext().collector(searchContext); collector = scanCollector; @@ -150,11 +154,32 @@ public class QueryPhase implements SearchPhase { }; } else { // Perhaps have a dedicated scroll phase? + final ScrollContext scrollContext = searchContext.scrollContext(); + assert (scrollContext != null) == (searchContext.request().scroll() != null); final TopDocsCollector topDocsCollector; ScoreDoc lastEmittedDoc; if (searchContext.request().scroll() != null) { numDocs = Math.min(searchContext.size(), totalNumDocs); - lastEmittedDoc = searchContext.lastEmittedDoc(); + lastEmittedDoc = scrollContext.lastEmittedDoc; + + if (Sort.INDEXORDER.equals(searchContext.sort())) { + if (scrollContext.totalHits == -1) { + // first round + assert scrollContext.lastEmittedDoc == null; + // there is not much that we can optimize here since we want to collect all + // documents in order to get the total number of hits + } else { + // now this gets interesting: since we sort in index-order, we can directly + // skip to the desired doc and stop collecting after ${size} matches + if (scrollContext.lastEmittedDoc != null) { + BooleanQuery bq = new BooleanQuery(); + bq.add(query, Occur.MUST); + bq.add(new MinDocQuery(lastEmittedDoc.doc + 1), Occur.FILTER); + query = bq; + } + searchContext.terminateAfter(numDocs); + } + } } else { lastEmittedDoc = null; } @@ -177,7 +202,31 @@ public class QueryPhase implements SearchPhase { topDocsCallable = new Callable() { @Override public TopDocs call() throws Exception { - return topDocsCollector.topDocs(); + TopDocs topDocs = topDocsCollector.topDocs(); + if (scrollContext != null) { + if (scrollContext.totalHits == -1) { + // first round + scrollContext.totalHits = topDocs.totalHits; + scrollContext.maxScore = topDocs.getMaxScore(); + } else { + // subsequent round: the total number of hits and + // the maximum score were computed on the first round + topDocs.totalHits = scrollContext.totalHits; + topDocs.setMaxScore(scrollContext.maxScore); + } + switch (searchType) { + case QUERY_AND_FETCH: + case DFS_QUERY_AND_FETCH: + // for (DFS_)QUERY_AND_FETCH, we already know the last emitted doc + if (topDocs.scoreDocs.length > 0) { + // set the last emitted doc + scrollContext.lastEmittedDoc = topDocs.scoreDocs[topDocs.scoreDocs.length - 1]; + } + default: + break; + } + } + return topDocs; } }; } @@ -227,19 +276,7 @@ public class QueryPhase implements SearchPhase { searchContext.queryResult().terminatedEarly(false); } - final TopDocs topDocs = topDocsCallable.call(); - if (searchContext.request().scroll() != null) { - int size = topDocs.scoreDocs.length; - if (size > 0) { - // In the case of *QUERY_AND_FETCH we don't get back to shards telling them which least - // relevant docs got emitted as hit, we can simply mark the last doc as last emitted - if (searchContext.searchType() == SearchType.QUERY_AND_FETCH || - searchContext.searchType() == SearchType.DFS_QUERY_AND_FETCH) { - searchContext.lastEmittedDoc(topDocs.scoreDocs[size - 1]); - } - } - } - searchContext.queryResult().topDocs(topDocs); + searchContext.queryResult().topDocs(topDocsCallable.call()); } catch (Throwable e) { throw new QueryPhaseExecutionException(searchContext, "Failed to execute main query", e); } diff --git a/core/src/main/java/org/elasticsearch/search/scan/ScanContext.java b/core/src/main/java/org/elasticsearch/search/scan/ScanContext.java index c0018d41020..b09a81bdab9 100644 --- a/core/src/main/java/org/elasticsearch/search/scan/ScanContext.java +++ b/core/src/main/java/org/elasticsearch/search/scan/ScanContext.java @@ -20,18 +20,13 @@ package org.elasticsearch.search.scan; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.queries.MinDocQuery; import org.apache.lucene.search.CollectionTerminatedException; -import org.apache.lucene.search.ConstantScoreScorer; -import org.apache.lucene.search.ConstantScoreWeight; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.Weight; -import org.apache.lucene.util.Bits; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.search.internal.SearchContext; @@ -118,93 +113,4 @@ public class ScanContext { } } - /** - * A filtering query that matches all doc IDs that are not deleted and - * greater than or equal to the configured doc ID. - */ - // pkg-private for testing - static class MinDocQuery extends Query { - - private final int minDoc; - - MinDocQuery(int minDoc) { - this.minDoc = minDoc; - } - - @Override - public int hashCode() { - return 31 * super.hashCode() + minDoc; - } - - @Override - public boolean equals(Object obj) { - if (super.equals(obj) == false) { - return false; - } - MinDocQuery that = (MinDocQuery) obj; - return minDoc == that.minDoc; - } - - @Override - public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - return new ConstantScoreWeight(this) { - @Override - public Scorer scorer(LeafReaderContext context, final Bits acceptDocs) throws IOException { - final int maxDoc = context.reader().maxDoc(); - if (context.docBase + maxDoc <= minDoc) { - return null; - } - final int segmentMinDoc = Math.max(0, minDoc - context.docBase); - final DocIdSetIterator disi = new DocIdSetIterator() { - - int doc = -1; - - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() throws IOException { - return advance(doc + 1); - } - - @Override - public int advance(int target) throws IOException { - assert target > doc; - if (doc == -1) { - // skip directly to minDoc - doc = Math.max(target, segmentMinDoc); - } else { - doc = target; - } - while (doc < maxDoc) { - if (acceptDocs == null || acceptDocs.get(doc)) { - break; - } - doc += 1; - } - if (doc >= maxDoc) { - doc = NO_MORE_DOCS; - } - return doc; - } - - @Override - public long cost() { - return maxDoc - minDoc; - } - - }; - return new ConstantScoreScorer(this, score(), disi); - } - }; - } - - @Override - public String toString(String field) { - return "MinDocQuery(minDoc=" + minDoc + ")"; - } - - } } diff --git a/core/src/test/java/org/apache/lucene/queries/MinDocQueryTests.java b/core/src/test/java/org/apache/lucene/queries/MinDocQueryTests.java new file mode 100644 index 00000000000..725bafbdfd6 --- /dev/null +++ b/core/src/test/java/org/apache/lucene/queries/MinDocQueryTests.java @@ -0,0 +1,61 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lucene.queries; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.QueryUtils; +import org.apache.lucene.store.Directory; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; + +public class MinDocQueryTests extends ESTestCase { + + public void testBasics() { + MinDocQuery query1 = new MinDocQuery(42); + MinDocQuery query2 = new MinDocQuery(42); + MinDocQuery query3 = new MinDocQuery(43); + QueryUtils.check(query1); + QueryUtils.checkEqual(query1, query2); + QueryUtils.checkUnequal(query1, query3); + } + + public void testRandom() throws IOException { + final int numDocs = randomIntBetween(10, 200); + final Document doc = new Document(); + final Directory dir = newDirectory(); + final RandomIndexWriter w = new RandomIndexWriter(getRandom(), dir); + for (int i = 0; i < numDocs; ++i) { + w.addDocument(doc); + } + final IndexReader reader = w.getReader(); + final IndexSearcher searcher = newSearcher(reader); + for (int i = 0; i <= numDocs; ++i) { + assertEquals(numDocs - i, searcher.count(new MinDocQuery(i))); + } + w.close(); + reader.close(); + dir.close(); + } + +} diff --git a/core/src/test/java/org/elasticsearch/search/scan/ScanContextTests.java b/core/src/test/java/org/elasticsearch/search/scan/ScanContextTests.java index e804393e0ed..38c01cb29e1 100644 --- a/core/src/test/java/org/elasticsearch/search/scan/ScanContextTests.java +++ b/core/src/test/java/org/elasticsearch/search/scan/ScanContextTests.java @@ -27,13 +27,11 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; -import org.apache.lucene.search.QueryUtils; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; -import org.elasticsearch.search.scan.ScanContext.MinDocQuery; import org.elasticsearch.search.scan.ScanContext.ScanCollector; import org.elasticsearch.test.ESTestCase; @@ -44,33 +42,6 @@ import java.util.List; public class ScanContextTests extends ESTestCase { - public void testMinDocQueryBasics() { - MinDocQuery query1 = new MinDocQuery(42); - MinDocQuery query2 = new MinDocQuery(42); - MinDocQuery query3 = new MinDocQuery(43); - QueryUtils.check(query1); - QueryUtils.checkEqual(query1, query2); - QueryUtils.checkUnequal(query1, query3); - } - - public void testMinDocQueryRandom() throws IOException { - final int numDocs = randomIntBetween(10, 200); - final Document doc = new Document(); - final Directory dir = newDirectory(); - final RandomIndexWriter w = new RandomIndexWriter(getRandom(), dir); - for (int i = 0; i < numDocs; ++i) { - w.addDocument(doc); - } - final IndexReader reader = w.getReader(); - final IndexSearcher searcher = newSearcher(reader); - for (int i = 0; i <= numDocs; ++i) { - assertEquals(numDocs - i, searcher.count(new MinDocQuery(i))); - } - w.close(); - reader.close(); - dir.close(); - } - private static TopDocs execute(IndexSearcher searcher, ScanContext ctx, Query query, int pageSize, boolean trackScores) throws IOException { query = ctx.wrapQuery(query); ScanCollector collector = ctx.collector(pageSize, trackScores); diff --git a/core/src/test/java/org/elasticsearch/search/scroll/DuelScrollIT.java b/core/src/test/java/org/elasticsearch/search/scroll/DuelScrollIT.java index d4e354a108b..efa26f25f7d 100644 --- a/core/src/test/java/org/elasticsearch/search/scroll/DuelScrollIT.java +++ b/core/src/test/java/org/elasticsearch/search/scroll/DuelScrollIT.java @@ -21,9 +21,13 @@ package org.elasticsearch.search.scroll; import com.carrotsearch.hppc.IntHashSet; import com.carrotsearch.randomizedtesting.generators.RandomPicks; + import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchType; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.sort.SortBuilder; @@ -226,4 +230,83 @@ public class DuelScrollIT extends ESIntegTestCase { } } + private int createIndex(boolean singleShard) throws Exception { + Settings.Builder settings = Settings.builder(); + if (singleShard) { + settings.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1); + } + // no replicas, as they might be ordered differently + settings.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0); + + assertAcked(prepareCreate("test").setSettings(settings.build()).get()); + final int numDocs = randomIntBetween(10, 200); + + IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs]; + for (int i = 0; i < numDocs; ++i) { + builders[i] = client().prepareIndex("test", "type", Integer.toString(i)).setSource("foo", random().nextBoolean()); + } + indexRandom(true, builders); + return numDocs; + } + + private void testDuelIndexOrder(SearchType searchType, boolean trackScores, int numDocs) throws Exception { + final int size = scaledRandomIntBetween(5, numDocs + 5); + final SearchResponse control = client().prepareSearch("test") + .setSearchType(searchType) + .setSize(numDocs) + .setQuery(QueryBuilders.matchQuery("foo", "true")) + .addSort(SortBuilders.fieldSort("_doc")) + .setTrackScores(trackScores) + .get(); + assertNoFailures(control); + + SearchResponse scroll = client().prepareSearch("test") + .setSearchType(searchType) + .setSize(size) + .setQuery(QueryBuilders.matchQuery("foo", "true")) + .addSort(SortBuilders.fieldSort("_doc")) + .setTrackScores(trackScores) + .setScroll("10m").get(); + + int scrollDocs = 0; + try { + while (true) { + assertNoFailures(scroll); + assertEquals(control.getHits().getTotalHits(), scroll.getHits().getTotalHits()); + assertEquals(control.getHits().getMaxScore(), scroll.getHits().getMaxScore(), 0.01f); + if (scroll.getHits().hits().length == 0) { + break; + } + for (int i = 0; i < scroll.getHits().hits().length; ++i) { + SearchHit controlHit = control.getHits().getAt(scrollDocs + i); + SearchHit scrollHit = scroll.getHits().getAt(i); + assertEquals(controlHit.getId(), scrollHit.getId()); + } + scrollDocs += scroll.getHits().hits().length; + scroll = client().prepareSearchScroll(scroll.getScrollId()).setScroll("10m").get(); + } + assertEquals(control.getHits().getTotalHits(), scrollDocs); + } catch (AssertionError e) { + logger.info("Control:\n" + control); + logger.info("Scroll size=" + size + ", from=" + scrollDocs + ":\n" + scroll); + throw e; + } finally { + clearScroll(scroll.getScrollId()); + } + } + + public void testDuelIndexOrderQueryAndFetch() throws Exception { + final SearchType searchType = RandomPicks.randomFrom(random(), Arrays.asList(SearchType.QUERY_AND_FETCH, SearchType.DFS_QUERY_AND_FETCH)); + // QUERY_AND_FETCH only works with a single shard + final int numDocs = createIndex(true); + testDuelIndexOrder(searchType, false, numDocs); + testDuelIndexOrder(searchType, true, numDocs); + } + + public void testDuelIndexOrderQueryThenFetch() throws Exception { + final SearchType searchType = RandomPicks.randomFrom(random(), Arrays.asList(SearchType.QUERY_THEN_FETCH, SearchType.DFS_QUERY_THEN_FETCH)); + final int numDocs = createIndex(false); + testDuelIndexOrder(searchType, false, numDocs); + testDuelIndexOrder(searchType, true, numDocs); + } } diff --git a/core/src/test/java/org/elasticsearch/test/TestSearchContext.java b/core/src/test/java/org/elasticsearch/test/TestSearchContext.java index 2e204afb265..fa8aeed014b 100644 --- a/core/src/test/java/org/elasticsearch/test/TestSearchContext.java +++ b/core/src/test/java/org/elasticsearch/test/TestSearchContext.java @@ -19,7 +19,11 @@ package org.elasticsearch.test; import com.carrotsearch.hppc.ObjectObjectAssociativeContainer; -import org.apache.lucene.search.*; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; import org.apache.lucene.util.Counter; import org.elasticsearch.action.search.SearchType; import org.elasticsearch.cache.recycler.PageCacheRecycler; @@ -41,7 +45,6 @@ import org.elasticsearch.index.query.ParsedQuery; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.script.ScriptService; -import org.elasticsearch.search.Scroll; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.aggregations.SearchContextAggregations; import org.elasticsearch.search.dfs.DfsSearchResult; @@ -53,6 +56,7 @@ import org.elasticsearch.search.fetch.script.ScriptFieldsContext; import org.elasticsearch.search.fetch.source.FetchSourceContext; import org.elasticsearch.search.highlight.SearchContextHighlight; import org.elasticsearch.search.internal.ContextIndexSearcher; +import org.elasticsearch.search.internal.ScrollContext; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.ShardSearchRequest; import org.elasticsearch.search.lookup.SearchLookup; @@ -62,7 +66,11 @@ import org.elasticsearch.search.scan.ScanContext; import org.elasticsearch.search.suggest.SuggestionSearchContext; import org.elasticsearch.threadpool.ThreadPool; -import java.util.*; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; public class TestSearchContext extends SearchContext { @@ -185,13 +193,13 @@ public class TestSearchContext extends SearchContext { } @Override - public Scroll scroll() { + public ScrollContext scrollContext() { return null; } @Override - public SearchContext scroll(Scroll scroll) { - return null; + public SearchContext scrollContext(ScrollContext scrollContext) { + throw new UnsupportedOperationException(); } @Override @@ -516,15 +524,6 @@ public class TestSearchContext extends SearchContext { public void keepAlive(long keepAlive) { } - @Override - public void lastEmittedDoc(ScoreDoc doc) { - } - - @Override - public ScoreDoc lastEmittedDoc() { - return null; - } - @Override public SearchLookup lookup() { return new SearchLookup(mapperService(), fieldData(), null);