LUCENE-6145: Make EarlyTerminatingSortingCollector able to early-terminate when the sort order is a prefix of the index-time order.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1648547 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2014-12-30 17:17:27 +00:00
parent b520131e23
commit 04bee476e1
6 changed files with 79 additions and 10 deletions

View File

@ -186,6 +186,9 @@ Optimizations
* LUCENE-6133: Improve default StoredFieldsWriter.merge() to be more efficient.
(Robert Muir)
* LUCENE-6145: Make EarlyTerminatingSortingCollector able to early-terminate
when the sort order is a prefix of the index-time order. (Adrien Grand)
API Changes
* LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and

View File

@ -186,8 +186,9 @@ public final class SortingMergePolicy extends MergePolicy {
}
/** Returns {@code true} if the given {@code reader} is sorted by the specified {@code sort}. */
public static boolean isSorted(LeafReader reader, Sort sort) {
/** Returns {@code true} if the given {@code reader} is sorted by the
* {@code sort} order of this {@link SortingMergePolicy}. */
public boolean isSorted(LeafReader reader) {
String description = getSortDescription(reader);
if (description != null && description.equals(sort.toString())) {
return true;
@ -228,6 +229,11 @@ public final class SortingMergePolicy extends MergePolicy {
this.sort = sort;
}
/** Return the {@link Sort} order that is used to sort segments when merging. */
public Sort getSort() {
return sort;
}
@Override
public MergeSpecification findMerges(MergeTrigger mergeTrigger,
SegmentInfos segmentInfos, IndexWriter writer) throws IOException {

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexWriter;
@ -66,10 +67,24 @@ import org.apache.lucene.search.TotalHitCountCollector;
*/
public class EarlyTerminatingSortingCollector extends FilterCollector {
/** Returns whether collection can be early-terminated if it sorts with the
* provided {@link Sort} and if segments are merged with the provided
* {@link SortingMergePolicy}. */
public static boolean canEarlyTerminate(Sort sort, SortingMergePolicy mergePolicy) {
final SortField[] fields1 = sort.getSort();
final SortField[] fields2 = mergePolicy.getSort().getSort();
// early termination is possible if fields1 is a prefix of fields2
if (fields1.length > fields2.length) {
return false;
}
return Arrays.asList(fields1).equals(Arrays.asList(fields2).subList(0, fields1.length));
}
/** Sort used to sort the search results */
protected final Sort sort;
/** Number of documents to collect in each segment */
protected final int numDocsToCollect;
private final SortingMergePolicy mergePolicy;
/**
* Create a new {@link EarlyTerminatingSortingCollector} instance.
@ -82,19 +97,25 @@ public class EarlyTerminatingSortingCollector extends FilterCollector {
* the number of documents to collect on each segment. When wrapping
* a {@link TopDocsCollector}, this number should be the number of
* hits.
* @throws IllegalArgumentException if the sort order doesn't allow for early
* termination with the given merge policy.
*/
public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect) {
public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect, SortingMergePolicy mergePolicy) {
super(in);
if (numDocsToCollect <= 0) {
throw new IllegalStateException("numDocsToCollect must always be > 0, got " + numDocsToCollect);
throw new IllegalArgumentException("numDocsToCollect must always be > 0, got " + numDocsToCollect);
}
if (canEarlyTerminate(sort, mergePolicy) == false) {
throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + mergePolicy.getSort());
}
this.sort = sort;
this.numDocsToCollect = numDocsToCollect;
this.mergePolicy = mergePolicy;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
if (SortingMergePolicy.isSorted(context.reader(), sort)) {
if (mergePolicy.isSorted(context.reader())) {
// segment is sorted, can early-terminate
return new FilterLeafCollector(super.getLeafCollector(context)) {
private int numCollected;

View File

@ -70,7 +70,7 @@ public class TestSortingMergePolicy extends LuceneTestCase {
return doc;
}
public static MergePolicy newSortingMergePolicy(Sort sort) {
public static SortingMergePolicy newSortingMergePolicy(Sort sort) {
// usually create a MP with a low merge factor so that many merges happen
MergePolicy mp;
int thingToDo = random().nextInt(3);

View File

@ -34,6 +34,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.SortingMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TestSortingMergePolicy;
import org.apache.lucene.search.LeafCollector;
@ -59,6 +60,7 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
private Sort sort;
private RandomIndexWriter iw;
private IndexReader reader;
private SortingMergePolicy mergePolicy;
@Override
public void setUp() throws Exception {
@ -86,7 +88,8 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
final long seed = random().nextLong();
final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests
iwc.setMergePolicy(TestSortingMergePolicy.newSortingMergePolicy(sort));
mergePolicy = TestSortingMergePolicy.newSortingMergePolicy(sort);
iwc.setMergePolicy(mergePolicy);
iw = new RandomIndexWriter(new Random(seed), dir, iwc);
iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP
for (int i = 0; i < numDocs; ++i) {
@ -134,7 +137,7 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
query = new MatchAllDocsQuery();
}
searcher.search(query, collector1);
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits));
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits, mergePolicy));
assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
}
@ -142,6 +145,40 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
}
}
public void testCanEarlyTerminate() {
assertTrue(canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG)),
new Sort(new SortField("a", SortField.Type.LONG))));
assertTrue(canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
assertTrue(canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG)),
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
assertFalse(canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG, true)),
new Sort(new SortField("a", SortField.Type.LONG, false))));
assertFalse(canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("a", SortField.Type.LONG))));
assertFalse(canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("a", SortField.Type.LONG), new SortField("c", SortField.Type.STRING))));
assertFalse(canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("c", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
}
private boolean canEarlyTerminate(Sort querySort, Sort mergeSort) {
return EarlyTerminatingSortingCollector.canEarlyTerminate(querySort, new SortingMergePolicy(newMergePolicy(), mergeSort));
}
public void testEarlyTerminationDifferentSorter() throws IOException {
createRandomIndex();
final int iters = atLeast(3);
@ -166,7 +203,7 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
}
searcher.search(query, collector1);
Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG));
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits) {
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits, new SortingMergePolicy(newMergePolicy(), different)) {
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final LeafCollector ret = super.getLeafCollector(context);

View File

@ -51,6 +51,7 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SegmentReader;
@ -511,7 +512,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
// We sorted postings by weight during indexing, so we
// only retrieve the first num hits now:
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
final MergePolicy mergePolicy = writer.getConfig().getMergePolicy();
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num, (SortingMergePolicy) mergePolicy);
IndexSearcher searcher = searcherMgr.acquire();
List<LookupResult> results = null;
try {