mirror of https://github.com/apache/lucene.git
LUCENE-6646: Make EarlyTerminatingCollector SortingMergePolicy-free.
Close #175 Close #178 git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1688894 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
14d1dcaf0f
commit
f6ff2431bd
|
@ -178,6 +178,9 @@ API Changes
|
|||
object to reduce the scope of doc IDs that may be returned, emphasizing the
|
||||
fact that these objects need to support random-access. (Adrien Grand)
|
||||
|
||||
* LUCENE-6646: Make EarlyTerminatingCollector take a Sort object directly
|
||||
instead of a SortingMergePolicy. (Christine Poerschke via Adrien Grand)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-6500: ParallelCompositeReader did not always call
|
||||
|
|
|
@ -187,8 +187,9 @@ public final class SortingMergePolicy extends MergePolicy {
|
|||
}
|
||||
|
||||
/** Returns {@code true} if the given {@code reader} is sorted by the
|
||||
* {@code sort} order of this {@link SortingMergePolicy}. */
|
||||
public boolean isSorted(LeafReader reader) {
|
||||
* {@code sort} given. Typically the given {@code sort} would be the
|
||||
* {@link SortingMergePolicy#getSort()} order of a {@link SortingMergePolicy}. */
|
||||
public static boolean isSorted(LeafReader reader, Sort sort) {
|
||||
String description = getSortDescription(reader);
|
||||
if (description != null && description.equals(sort.toString())) {
|
||||
return true;
|
||||
|
|
|
@ -38,11 +38,12 @@ import org.apache.lucene.search.TotalHitCountCollector;
|
|||
* {@link Sort}.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> the {@code Collector} detects sorted segments according to
|
||||
* {@link SortingMergePolicy}, so it's best used in conjunction with it. Also,
|
||||
* it collects up to a specified {@code numDocsToCollect} from each segment,
|
||||
* and therefore is mostly suitable for use in conjunction with collectors such as
|
||||
* {@link TopDocsCollector}, and not e.g. {@link TotalHitCountCollector}.
|
||||
* <b>NOTE:</b> the {@code Collector} detects segments sorted according to a
|
||||
* {@link SortingMergePolicy}'s {@link Sort} and so it's best used in conjunction
|
||||
* with a {@link SortingMergePolicy}. Also,it collects up to a specified
|
||||
* {@code numDocsToCollect} from each segment, and therefore is mostly suitable
|
||||
* for use in conjunction with collectors such as {@link TopDocsCollector}, and
|
||||
* not e.g. {@link TotalHitCountCollector}.
|
||||
* <p>
|
||||
* <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same
|
||||
* order as the index order, the returned {@link TopDocsCollector#topDocs() TopDocs}
|
||||
|
@ -69,10 +70,10 @@ public class EarlyTerminatingSortingCollector extends FilterCollector {
|
|||
|
||||
/** Returns whether collection can be early-terminated if it sorts with the
|
||||
* provided {@link Sort} and if segments are merged with the provided
|
||||
* {@link SortingMergePolicy}. */
|
||||
public static boolean canEarlyTerminate(Sort sort, SortingMergePolicy mergePolicy) {
|
||||
final SortField[] fields1 = sort.getSort();
|
||||
final SortField[] fields2 = mergePolicy.getSort().getSort();
|
||||
* {@link Sort}. */
|
||||
public static boolean canEarlyTerminate(Sort searchSort, Sort mergePolicySort) {
|
||||
final SortField[] fields1 = searchSort.getSort();
|
||||
final SortField[] fields2 = mergePolicySort.getSort();
|
||||
// early termination is possible if fields1 is a prefix of fields2
|
||||
if (fields1.length > fields2.length) {
|
||||
return false;
|
||||
|
@ -84,7 +85,7 @@ public class EarlyTerminatingSortingCollector extends FilterCollector {
|
|||
protected final Sort sort;
|
||||
/** Number of documents to collect in each segment */
|
||||
protected final int numDocsToCollect;
|
||||
private final SortingMergePolicy mergePolicy;
|
||||
private final Sort mergePolicySort;
|
||||
|
||||
/**
|
||||
* Create a new {@link EarlyTerminatingSortingCollector} instance.
|
||||
|
@ -97,25 +98,27 @@ public class EarlyTerminatingSortingCollector extends FilterCollector {
|
|||
* the number of documents to collect on each segment. When wrapping
|
||||
* a {@link TopDocsCollector}, this number should be the number of
|
||||
* hits.
|
||||
* @param mergePolicySort
|
||||
* the sort your {@link SortingMergePolicy} uses
|
||||
* @throws IllegalArgumentException if the sort order doesn't allow for early
|
||||
* termination with the given merge policy.
|
||||
*/
|
||||
public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect, SortingMergePolicy mergePolicy) {
|
||||
public EarlyTerminatingSortingCollector(Collector in, Sort sort, int numDocsToCollect, Sort mergePolicySort) {
|
||||
super(in);
|
||||
if (numDocsToCollect <= 0) {
|
||||
throw new IllegalArgumentException("numDocsToCollect must always be > 0, got " + numDocsToCollect);
|
||||
}
|
||||
if (canEarlyTerminate(sort, mergePolicy) == false) {
|
||||
throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + mergePolicy.getSort());
|
||||
if (canEarlyTerminate(sort, mergePolicySort) == false) {
|
||||
throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + mergePolicySort);
|
||||
}
|
||||
this.sort = sort;
|
||||
this.numDocsToCollect = numDocsToCollect;
|
||||
this.mergePolicy = mergePolicy;
|
||||
this.mergePolicySort = mergePolicySort;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
if (mergePolicy.isSorted(context.reader())) {
|
||||
if (SortingMergePolicy.isSorted(context.reader(), mergePolicySort)) {
|
||||
// segment is sorted, can early-terminate
|
||||
return new FilterLeafCollector(super.getLeafCollector(context)) {
|
||||
private int numCollected;
|
||||
|
|
|
@ -136,7 +136,7 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
|
|||
query = new MatchAllDocsQuery();
|
||||
}
|
||||
searcher.search(query, collector1);
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits, mergePolicy));
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits, mergePolicy.getSort()));
|
||||
assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
|
||||
assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
|
||||
}
|
||||
|
@ -145,39 +145,35 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testCanEarlyTerminate() {
|
||||
assertTrue(canEarlyTerminate(
|
||||
assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG))));
|
||||
|
||||
assertTrue(canEarlyTerminate(
|
||||
assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
|
||||
|
||||
assertTrue(canEarlyTerminate(
|
||||
assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
|
||||
|
||||
assertFalse(canEarlyTerminate(
|
||||
assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG, true)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG, false))));
|
||||
|
||||
assertFalse(canEarlyTerminate(
|
||||
assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG))));
|
||||
|
||||
assertFalse(canEarlyTerminate(
|
||||
assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("c", SortField.Type.STRING))));
|
||||
|
||||
assertFalse(canEarlyTerminate(
|
||||
assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
|
||||
new Sort(new SortField("c", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
|
||||
}
|
||||
|
||||
private boolean canEarlyTerminate(Sort querySort, Sort mergeSort) {
|
||||
return EarlyTerminatingSortingCollector.canEarlyTerminate(querySort, new SortingMergePolicy(newMergePolicy(), mergeSort));
|
||||
}
|
||||
|
||||
public void testEarlyTerminationDifferentSorter() throws IOException {
|
||||
createRandomIndex();
|
||||
final int iters = atLeast(3);
|
||||
|
@ -201,7 +197,8 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
|
|||
}
|
||||
searcher.search(query, collector1);
|
||||
Sort different = new Sort(new SortField("ndv2", SortField.Type.LONG));
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits, new SortingMergePolicy(newMergePolicy(), different)) {
|
||||
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, different, numHits, different) {
|
||||
@Override
|
||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
final LeafCollector ret = super.getLeafCollector(context);
|
||||
|
|
|
@ -52,7 +52,6 @@ import org.apache.lucene.index.IndexWriter;
|
|||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MergePolicy;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
|
@ -587,8 +586,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
|
||||
// We sorted postings by weight during indexing, so we
|
||||
// only retrieve the first num hits now:
|
||||
final MergePolicy mergePolicy = writer.getConfig().getMergePolicy();
|
||||
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num, (SortingMergePolicy) mergePolicy);
|
||||
final SortingMergePolicy sortingMergePolicy = (SortingMergePolicy) writer.getConfig().getMergePolicy();
|
||||
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num, sortingMergePolicy.getSort());
|
||||
IndexSearcher searcher = searcherMgr.acquire();
|
||||
List<LookupResult> results = null;
|
||||
try {
|
||||
|
|
Loading…
Reference in New Issue