LUCENE-6646: Make EarlyTerminatingCollector SortingMergePolicy-free.

Close #175
Close #178


git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1688894 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-07-02 19:33:53 +00:00
parent 14d1dcaf0f
commit f6ff2431bd
5 changed files with 36 additions and 33 deletions

View File

@ -178,6 +178,9 @@ API Changes
object to reduce the scope of doc IDs that may be returned, emphasizing the
fact that these objects need to support random-access. (Adrien Grand)
* LUCENE-6646: Make EarlyTerminatingCollector take a Sort object directly
instead of a SortingMergePolicy. (Christine Poerschke via Adrien Grand)
Bug fixes
* LUCENE-6500: ParallelCompositeReader did not always call

View File

@ -187,8 +187,9 @@ public final class SortingMergePolicy extends MergePolicy {
}
/** Returns {@code true} if the given {@code reader} is sorted by the
* {@code sort} order of this {@link SortingMergePolicy}. */
public boolean isSorted(LeafReader reader) {
* {@code sort} given. Typically the given {@code sort} would be the
* {@link SortingMergePolicy#getSort()} order of a {@link SortingMergePolicy}. */
public static boolean isSorted(LeafReader reader, Sort sort) {
String description = getSortDescription(reader);
if (description != null && description.equals(sort.toString())) {
return true;

View File

@ -38,11 +38,12 @@ import org.apache.lucene.search.TotalHitCountCollector;
* {@link Sort}.
*
* <p>
* <b>NOTE:</b> the {@code Collector} detects sorted segments according to
* {@link SortingMergePolicy}, so it's best used in conjunction with it. Also,
* it collects up to a specified {@code numDocsToCollect} from each segment,
* and therefore is mostly suitable for use in conjunction with collectors such as
* {@link TopDocsCollector}, and not e.g. {@link TotalHitCountCollector}.
* <b>NOTE:</b> the {@code Collector} detects segments sorted according to a
* {@link SortingMergePolicy}'s {@link Sort} and so it's best used in conjunction
* with a {@link SortingMergePolicy}. Also,it collects up to a specified
* {@code numDocsToCollect} from each segment, and therefore is mostly suitable
* for use in conjunction with collectors such as {@link TopDocsCollector}, and
* not e.g. {@link TotalHitCountCollector}.
* <p>
* <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same
* order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs}
@ -69,10 +70,10 @@ public class EarlyTerminatingSortingCollector extends FilterCollector {
/** Returns whether collection can be early-terminated if it sorts with the
* provided {@link Sort} and if segments are merged with the provided
* {@link SortingMergePolicy}. */
public static boolean canEarlyTerminate(Sort sort, SortingMergePolicy mergePolicy) {
final SortField[] fields1 = sort.getSort();
final SortField[] fields2 = mergePolicy.getSort().getSort();
* {@link Sort}. */
public static boolean canEarlyTerminate(Sort searchSort, Sort mergePolicySort) {
final SortField[] fields1 = searchSort.getSort();
final SortField[] fields2 = mergePolicySort.getSort();
// early termination is possible if fields1 is a prefix of fields2
if (fields1.length > fields2.length) {
return false;
@ -84,7 +85,7 @@ public class EarlyTerminatingSortingCollector extends FilterCollector {
protected final Sort sort;
/** Number of documents to collect in each segment */
protected final int numDocsToCollect;
private final SortingMergePolicy mergePolicy;
private final Sort mergePolicySort;
/**
* Create a new {@link EarlyTerminatingSortingCollector} instance.
@ -97,25 +98,27 @@ public class EarlyTerminatingSortingCollector extends FilterCollector {
* the number of documents to collect on each segment. When wrapping
* a {@link TopDocsCollector}, this number should be the number of
* hits.
* @param mergePolicySort
* the sort your {@link SortingMergePolicy} uses
* @throws IllegalArgumentException if the sort order doesn't allow for early
* termination with the given merge policy.
*/
public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect, SortingMergePolicy mergePolicy) {
public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect, Sort mergePolicySort) {
super(in);
if (numDocsToCollect <= 0) {
throw new IllegalArgumentException("numDocsToCollect must always be > 0, got " + numDocsToCollect);
}
if (canEarlyTerminate(sort, mergePolicy) == false) {
throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + mergePolicy.getSort());
if (canEarlyTerminate(sort, mergePolicySort) == false) {
throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + mergePolicySort);
}
this.sort = sort;
this.numDocsToCollect = numDocsToCollect;
this.mergePolicy = mergePolicy;
this.mergePolicySort = mergePolicySort;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
if (mergePolicy.isSorted(context.reader())) {
if (SortingMergePolicy.isSorted(context.reader(), mergePolicySort)) {
// segment is sorted, can early-terminate
return new FilterLeafCollector(super.getLeafCollector(context)) {
private int numCollected;

View File

@ -136,7 +136,7 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
query = new MatchAllDocsQuery();
}
searcher.search(query, collector1);
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits, mergePolicy));
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits, mergePolicy.getSort()));
assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
}
@ -145,39 +145,35 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
}
public void testCanEarlyTerminate() {
assertTrue(canEarlyTerminate(
assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG)),
new Sort(new SortField("a", SortField.Type.LONG))));
assertTrue(canEarlyTerminate(
assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
assertTrue(canEarlyTerminate(
assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG)),
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
assertFalse(canEarlyTerminate(
assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG, true)),
new Sort(new SortField("a", SortField.Type.LONG, false))));
assertFalse(canEarlyTerminate(
assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("a", SortField.Type.LONG))));
assertFalse(canEarlyTerminate(
assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("a", SortField.Type.LONG), new SortField("c", SortField.Type.STRING))));
assertFalse(canEarlyTerminate(
assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
new Sort(new SortField("c", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
}
private boolean canEarlyTerminate(Sort querySort, Sort mergeSort) {
return EarlyTerminatingSortingCollector.canEarlyTerminate(querySort, new SortingMergePolicy(newMergePolicy(), mergeSort));
}
public void testEarlyTerminationDifferentSorter() throws IOException {
createRandomIndex();
final int iters = atLeast(3);
@ -201,7 +197,8 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
}
searcher.search(query, collector1);
Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG));
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits, new SortingMergePolicy(newMergePolicy(), different)) {
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits, different) {
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final LeafCollector ret = super.getLeafCollector(context);

View File

@ -52,7 +52,6 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SegmentReader;
@ -587,8 +586,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
// We sorted postings by weight during indexing, so we
// only retrieve the first num hits now:
final MergePolicy mergePolicy = writer.getConfig().getMergePolicy();
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num, (SortingMergePolicy) mergePolicy);
final SortingMergePolicy sortingMergePolicy = (SortingMergePolicy) writer.getConfig().getMergePolicy();
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num, sortingMergePolicy.getSort());
IndexSearcher searcher = searcherMgr.acquire();
List<LookupResult> results = null;
try {