mirror of https://github.com/apache/lucene.git
LUCENE-6145: Make EarlyTerminatingSortingCollector able to early-terminate when the sort order is a prefix of the index-time order.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1648547 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b520131e23
commit
04bee476e1
|
@ -186,6 +186,9 @@ Optimizations
|
|||
* LUCENE-6133: Improve default StoredFieldsWriter.merge() to be more efficient.
|
||||
(Robert Muir)
|
||||
|
||||
* LUCENE-6145: Make EarlyTerminatingSortingCollector able to early-terminate
|
||||
when the sort order is a prefix of the index-time order. (Adrien Grand)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and
|
||||
|
|
|
@ -186,8 +186,9 @@ public final class SortingMergePolicy extends MergePolicy {
|
|||
|
||||
}
|
||||
|
||||
/** Returns {@code true} if the given {@code reader} is sorted by the specified {@code sort}. */
|
||||
public static boolean isSorted(LeafReader reader, Sort sort) {
|
||||
/** Returns {@code true} if the given {@code reader} is sorted by the
|
||||
* {@code sort} order of this {@link SortingMergePolicy}. */
|
||||
public boolean isSorted(LeafReader reader) {
|
||||
String description = getSortDescription(reader);
|
||||
if (description != null && description.equals(sort.toString())) {
|
||||
return true;
|
||||
|
@ -228,6 +229,11 @@ public final class SortingMergePolicy extends MergePolicy {
|
|||
this.sort = sort;
|
||||
}
|
||||
|
||||
/** Return the {@link Sort} order that is used to sort segments when merging. */
|
||||
public Sort getSort() {
|
||||
return sort;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MergeSpecification findMerges(MergeTrigger mergeTrigger,
|
||||
SegmentInfos segmentInfos, IndexWriter writer) throws IOException {
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -66,10 +67,24 @@ import org.apache.lucene.search.TotalHitCountCollector;
|
|||
*/
|
||||
public class EarlyTerminatingSortingCollector extends FilterCollector {
|
||||
|
||||
/** Returns whether collection can be early-terminated if it sorts with the
|
||||
* provided {@link Sort} and if segments are merged with the provided
|
||||
* {@link SortingMergePolicy}. */
|
||||
public static boolean canEarlyTerminate(Sort sort, SortingMergePolicy mergePolicy) {
|
||||
final SortField[] fields1 = sort.getSort();
|
||||
final SortField[] fields2 = mergePolicy.getSort().getSort();
|
||||
// early termination is possible if fields1 is a prefix of fields2
|
||||
if (fields1.length > fields2.length) {
|
||||
return false;
|
||||
}
|
||||
return Arrays.asList(fields1).equals(Arrays.asList(fields2).subList(0, fields1.length));
|
||||
}
|
||||
|
||||
/** Sort used to sort the search results */
|
||||
protected final Sort sort;
|
||||
/** Number of documents to collect in each segment */
|
||||
protected final int numDocsToCollect;
|
||||
private final SortingMergePolicy mergePolicy;
|
||||
|
||||
/**
|
||||
* Create a new {@link EarlyTerminatingSortingCollector} instance.
|
||||
|
@ -82,19 +97,25 @@ public class EarlyTerminatingSortingCollector extends FilterCollector {
|
|||
* the number of documents to collect on each segment. When wrapping
|
||||
* a {@link TopDocsCollector}, this number should be the number of
|
||||
* hits.
|
||||
* @throws IllegalArgumentException if the sort order doesn't allow for early
|
||||
* termination with the given merge policy.
|
||||
*/
|
||||
public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect) {
|
||||
public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect, SortingMergePolicy mergePolicy) {
|
||||
super(in);
|
||||
if (numDocsToCollect <= 0) {
|
||||
throw new IllegalStateException("numDocsToCollect must always be > 0, got " + numDocsToCollect);
|
||||
throw new IllegalArgumentException("numDocsToCollect must always be > 0, got " + numDocsToCollect);
|
||||
}
|
||||
if (canEarlyTerminate(sort, mergePolicy) == false) {
|
||||
throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + mergePolicy.getSort());
|
||||
}
|
||||
this.sort = sort;
|
||||
this.numDocsToCollect = numDocsToCollect;
|
||||
this.mergePolicy = mergePolicy;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
if (SortingMergePolicy.isSorted(context.reader(), sort)) {
|
||||
if (mergePolicy.isSorted(context.reader())) {
|
||||
// segment is sorted, can early-terminate
|
||||
return new FilterLeafCollector(super.getLeafCollector(context)) {
|
||||
private int numCollected;
|
||||
|
|
|
@ -70,7 +70,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
|
|||
return doc;
|
||||
}
|
||||
|
||||
public static MergePolicy newSortingMergePolicy(Sort sort) {
|
||||
public static SortingMergePolicy newSortingMergePolicy(Sort sort) {
|
||||
// usually create a MP with a low merge factor so that many merges happen
|
||||
MergePolicy mp;
|
||||
int thingToDo = random().nextInt(3);
|
||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SerialMergeScheduler;
|
||||
import org.apache.lucene.index.SortingMergePolicy;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TestSortingMergePolicy;
|
||||
import org.apache.lucene.search.LeafCollector;
|
||||
|
@ -59,6 +60,7 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
|
|||
private Sort sort;
|
||||
private RandomIndexWriter iw;
|
||||
private IndexReader reader;
|
||||
private SortingMergePolicy mergePolicy;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
|
@ -86,7 +88,8 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
|
|||
final long seed = random().nextLong();
|
||||
final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
|
||||
iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests
|
||||
iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sort));
|
||||
mergePolicy = TestSortingMergePolicy.newSortingMergePolicy(sort);
|
||||
iwc.setMergePolicy(mergePolicy);
|
||||
iw = new RandomIndexWriter(new Random(seed), dir, iwc);
|
||||
iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
|
@ -134,7 +137,7 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
|
|||
query = new MatchAllDocsQuery();
|
||||
}
|
||||
searcher.search(query, collector1);
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits));
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits, mergePolicy));
|
||||
assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
|
||||
assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
|
||||
}
|
||||
|
@ -142,6 +145,40 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testCanEarlyTerminate() {
|
||||
assertTrue(canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG))));
|
||||
|
||||
assertTrue(canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
|
||||
|
||||
assertTrue(canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
|
||||
|
||||
assertFalse(canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG, true)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG, false))));
|
||||
|
||||
assertFalse(canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG))));
|
||||
|
||||
assertFalse(canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("c", SortField.Type.STRING))));
|
||||
|
||||
assertFalse(canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
|
||||
new Sort(new SortField("c", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
|
||||
}
|
||||
|
||||
private boolean canEarlyTerminate(Sort querySort, Sort mergeSort) {
|
||||
return EarlyTerminatingSortingCollector.canEarlyTerminate(querySort, new SortingMergePolicy(newMergePolicy(), mergeSort));
|
||||
}
|
||||
|
||||
public void testEarlyTerminationDifferentSorter() throws IOException {
|
||||
createRandomIndex();
|
||||
final int iters = atLeast(3);
|
||||
|
@ -166,7 +203,7 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
|
|||
}
|
||||
searcher.search(query, collector1);
|
||||
Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG));
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits) {
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits, new SortingMergePolicy(newMergePolicy(), different)) {
|
||||
@Override
|
||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
final LeafCollector ret = super.getLeafCollector(context);
|
||||
|
|
|
@ -51,6 +51,7 @@ import org.apache.lucene.index.IndexWriter;
|
|||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MergePolicy;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
|
@ -511,7 +512,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
|
||||
// We sorted postings by weight during indexing, so we
|
||||
// only retrieve the first num hits now:
|
||||
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
|
||||
final MergePolicy mergePolicy = writer.getConfig().getMergePolicy();
|
||||
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num, (SortingMergePolicy) mergePolicy);
|
||||
IndexSearcher searcher = searcherMgr.acquire();
|
||||
List<LookupResult> results = null;
|
||||
try {
|
||||
|
|
Loading…
Reference in New Issue