LUCENE-6646: Make EarlyTerminatingCollector SortingMergePolicy-free.

Close #175
Close #178


git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1688894 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-07-02 19:33:53 +00:00
parent 14d1dcaf0f
commit f6ff2431bd
5 changed files with 36 additions and 33 deletions

View File

@ -178,6 +178,9 @@ API Changes
object to reduce the scope of doc IDs that may be returned, emphasizing the object to reduce the scope of doc IDs that may be returned, emphasizing the
fact that these objects need to support random-access. (Adrien Grand) fact that these objects need to support random-access. (Adrien Grand)
* LUCENE-6646: Make EarlyTerminatingCollector take a Sort object directly
instead of a SortingMergePolicy. (Christine Poerschke via Adrien Grand)
Bug fixes Bug fixes
* LUCENE-6500: ParallelCompositeReader did not always call * LUCENE-6500: ParallelCompositeReader did not always call

View File

@ -187,8 +187,9 @@ public final class SortingMergePolicy extends MergePolicy {
} }
/** Returns {@code true} if the given {@code reader} is sorted by the /** Returns {@code true} if the given {@code reader} is sorted by the
* {@code sort} order of this {@link SortingMergePolicy}. */ * {@code sort} given. Typically the given {@code sort} would be the
public boolean isSorted(LeafReader reader) { * {@link SortingMergePolicy#getSort()} order of a {@link SortingMergePolicy}. */
public static boolean isSorted(LeafReader reader, Sort sort) {
String description = getSortDescription(reader); String description = getSortDescription(reader);
if (description != null && description.equals(sort.toString())) { if (description != null && description.equals(sort.toString())) {
return true; return true;

View File

@ -38,11 +38,12 @@ import org.apache.lucene.search.TotalHitCountCollector;
* {@link Sort}. * {@link Sort}.
* *
* <p> * <p>
* <b>NOTE:</b> the {@code Collector} detects sorted segments according to * <b>NOTE:</b> the {@code Collector} detects segments sorted according to a
* {@link SortingMergePolicy}, so it's best used in conjunction with it. Also, * {@link SortingMergePolicy}'s {@link Sort} and so it's best used in conjunction
* it collects up to a specified {@code numDocsToCollect} from each segment, * with a {@link SortingMergePolicy}. Also,it collects up to a specified
* and therefore is mostly suitable for use in conjunction with collectors such as * {@code numDocsToCollect} from each segment, and therefore is mostly suitable
* {@link TopDocsCollector}, and not e.g. {@link TotalHitCountCollector}. * for use in conjunction with collectors such as {@link TopDocsCollector}, and
* not e.g. {@link TotalHitCountCollector}.
* <p> * <p>
* <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same * <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same
* order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs} * order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs}
@ -69,10 +70,10 @@ public class EarlyTerminatingSortingCollector extends FilterCollector {
/** Returns whether collection can be early-terminated if it sorts with the /** Returns whether collection can be early-terminated if it sorts with the
* provided {@link Sort} and if segments are merged with the provided * provided {@link Sort} and if segments are merged with the provided
* {@link SortingMergePolicy}. */ * {@link Sort}. */
public static boolean canEarlyTerminate(Sort sort, SortingMergePolicy mergePolicy) { public static boolean canEarlyTerminate(Sort searchSort, Sort mergePolicySort) {
final SortField[] fields1 = sort.getSort(); final SortField[] fields1 = searchSort.getSort();
final SortField[] fields2 = mergePolicy.getSort().getSort(); final SortField[] fields2 = mergePolicySort.getSort();
// early termination is possible if fields1 is a prefix of fields2 // early termination is possible if fields1 is a prefix of fields2
if (fields1.length > fields2.length) { if (fields1.length > fields2.length) {
return false; return false;
@ -84,7 +85,7 @@ public class EarlyTerminatingSortingCollector extends FilterCollector {
protected final Sort sort; protected final Sort sort;
/** Number of documents to collect in each segment */ /** Number of documents to collect in each segment */
protected final int numDocsToCollect; protected final int numDocsToCollect;
private final SortingMergePolicy mergePolicy; private final Sort mergePolicySort;
/** /**
* Create a new {@link EarlyTerminatingSortingCollector} instance. * Create a new {@link EarlyTerminatingSortingCollector} instance.
@ -97,25 +98,27 @@ public class EarlyTerminatingSortingCollector extends FilterCollector {
* the number of documents to collect on each segment. When wrapping * the number of documents to collect on each segment. When wrapping
* a {@link TopDocsCollector}, this number should be the number of * a {@link TopDocsCollector}, this number should be the number of
* hits. * hits.
* @param mergePolicySort
* the sort your {@link SortingMergePolicy} uses
* @throws IllegalArgumentException if the sort order doesn't allow for early * @throws IllegalArgumentException if the sort order doesn't allow for early
* termination with the given merge policy. * termination with the given merge policy.
*/ */
public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect, SortingMergePolicy mergePolicy) { public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect, Sort mergePolicySort) {
super(in); super(in);
if (numDocsToCollect <= 0) { if (numDocsToCollect <= 0) {
throw new IllegalArgumentException("numDocsToCollect must always be > 0, got " + numDocsToCollect); throw new IllegalArgumentException("numDocsToCollect must always be > 0, got " + numDocsToCollect);
} }
if (canEarlyTerminate(sort, mergePolicy) == false) { if (canEarlyTerminate(sort, mergePolicySort) == false) {
throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + mergePolicy.getSort()); throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + mergePolicySort);
} }
this.sort = sort; this.sort = sort;
this.numDocsToCollect = numDocsToCollect; this.numDocsToCollect = numDocsToCollect;
this.mergePolicy = mergePolicy; this.mergePolicySort = mergePolicySort;
} }
@Override @Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
if (mergePolicy.isSorted(context.reader())) { if (SortingMergePolicy.isSorted(context.reader(), mergePolicySort)) {
// segment is sorted, can early-terminate // segment is sorted, can early-terminate
return new FilterLeafCollector(super.getLeafCollector(context)) { return new FilterLeafCollector(super.getLeafCollector(context)) {
private int numCollected; private int numCollected;

View File

@ -136,7 +136,7 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
query = new MatchAllDocsQuery(); query = new MatchAllDocsQuery();
} }
searcher.search(query, collector1); searcher.search(query, collector1);
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits, mergePolicy)); searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits, mergePolicy.getSort()));
assertTrue(collector1.getTotalHits() >= collector2.getTotalHits()); assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs); assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
} }
@ -145,39 +145,35 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
} }
public void testCanEarlyTerminate() { public void testCanEarlyTerminate() {
assertTrue(canEarlyTerminate( assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG)), new Sort(new SortField("a", SortField.Type.LONG)),
new Sort(new SortField("a", SortField.Type.LONG)))); new Sort(new SortField("a", SortField.Type.LONG))));
assertTrue(canEarlyTerminate( assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)))); new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
assertTrue(canEarlyTerminate( assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG)), new Sort(new SortField("a", SortField.Type.LONG)),
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)))); new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
assertFalse(canEarlyTerminate( assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG, true)), new Sort(new SortField("a", SortField.Type.LONG, true)),
new Sort(new SortField("a", SortField.Type.LONG, false)))); new Sort(new SortField("a", SortField.Type.LONG, false))));
assertFalse(canEarlyTerminate( assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("a", SortField.Type.LONG)))); new Sort(new SortField("a", SortField.Type.LONG))));
assertFalse(canEarlyTerminate( assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("a", SortField.Type.LONG), new SortField("c", SortField.Type.STRING)))); new Sort(new SortField("a", SortField.Type.LONG), new SortField("c", SortField.Type.STRING))));
assertFalse(canEarlyTerminate( assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("c", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)))); new Sort(new SortField("c", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
} }
private boolean canEarlyTerminate(Sort querySort, Sort mergeSort) {
return EarlyTerminatingSortingCollector.canEarlyTerminate(querySort, new SortingMergePolicy(newMergePolicy(), mergeSort));
}
public void testEarlyTerminationDifferentSorter() throws IOException { public void testEarlyTerminationDifferentSorter() throws IOException {
createRandomIndex(); createRandomIndex();
final int iters = atLeast(3); final int iters = atLeast(3);
@ -201,7 +197,8 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
} }
searcher.search(query, collector1); searcher.search(query, collector1);
Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG)); Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG));
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits, new SortingMergePolicy(newMergePolicy(), different)) {
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits, different) {
@Override @Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final LeafCollector ret = super.getLeafCollector(context); final LeafCollector ret = super.getLeafCollector(context);

View File

@ -52,7 +52,6 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.SegmentReader;
@ -587,8 +586,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
// We sorted postings by weight during indexing, so we // We sorted postings by weight during indexing, so we
// only retrieve the first num hits now: // only retrieve the first num hits now:
final MergePolicy mergePolicy = writer.getConfig().getMergePolicy(); final SortingMergePolicy sortingMergePolicy = (SortingMergePolicy) writer.getConfig().getMergePolicy();
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num, (SortingMergePolicy) mergePolicy); Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num, sortingMergePolicy.getSort());
IndexSearcher searcher = searcherMgr.acquire(); IndexSearcher searcher = searcherMgr.acquire();
List<LookupResult> results = null; List<LookupResult> results = null;
try { try {