mirror of https://github.com/apache/lucene.git
LUCENE-8059: Fold early termination support into TopFieldCollector.
This commit is contained in:
parent
d27ddcb409
commit
812711416d
|
@ -73,6 +73,10 @@ API Changes
|
|||
* LUCENE-8049: IndexWriter.getMergingSegments()'s return type was changed from
|
||||
Collection to Set to more accurately reflect it's nature. (David Smiley)
|
||||
|
||||
* LUCENE-8059: TopFieldDocCollector can now early terminate collection when
|
||||
the sort order is compatible with the index order. As a consequence,
|
||||
EarlyTerminatingSortingCollector is now deprecated. (Adrien Grand)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-8061: Add convenience factory methods to create BBoxes and XYZSolids
|
||||
|
|
|
@ -113,7 +113,8 @@ public abstract class ReadTask extends PerfTask {
|
|||
// pulling the Weight ourselves:
|
||||
TopFieldCollector collector = TopFieldCollector.create(sort, numHits,
|
||||
true, withScore(),
|
||||
withMaxScore());
|
||||
withMaxScore(),
|
||||
withTotalHits());
|
||||
searcher.search(q, collector);
|
||||
hits = collector.topDocs();
|
||||
} else {
|
||||
|
@ -220,6 +221,12 @@ public abstract class ReadTask extends PerfTask {
|
|||
return true;
|
||||
}
|
||||
|
||||
/** Whether totalHits should be computed (only useful with
|
||||
* field sort) */
|
||||
public boolean withTotalHits() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specify the number of hits to traverse. Tasks should override this if they want to restrict the number
|
||||
* of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0.
|
||||
|
|
|
@ -535,7 +535,8 @@ public class IndexSearcher {
|
|||
@Override
|
||||
public TopFieldCollector newCollector() throws IOException {
|
||||
final boolean fillFields = true;
|
||||
return TopFieldCollector.create(rewrittenSort, cappedNumHits, after, fillFields, doDocScores, doMaxScore);
|
||||
// TODO: don't pay the price for accurate hit counts by default
|
||||
return TopFieldCollector.create(rewrittenSort, cappedNumHits, after, fillFields, doDocScores, doMaxScore, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -54,7 +54,7 @@ public class SortRescorer extends Rescorer {
|
|||
|
||||
List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
|
||||
|
||||
TopFieldCollector collector = TopFieldCollector.create(sort, topN, true, true, true);
|
||||
TopFieldCollector collector = TopFieldCollector.create(sort, topN, true, true, true, true);
|
||||
|
||||
// Now merge sort docIDs from hits, with reader's leaves:
|
||||
int hitUpto = 0;
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.FieldValueHitQueue.Entry;
|
||||
|
@ -27,7 +28,7 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
* A {@link Collector} that sorts by {@link SortField} using
|
||||
* {@link FieldComparator}s.
|
||||
* <p>
|
||||
* See the {@link #create(org.apache.lucene.search.Sort, int, boolean, boolean, boolean)} method
|
||||
* See the {@link #create(org.apache.lucene.search.Sort, int, boolean, boolean, boolean, boolean)} method
|
||||
* for instantiating a TopFieldCollector.
|
||||
*
|
||||
* @lucene.experimental
|
||||
|
@ -67,20 +68,40 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
}
|
||||
}
|
||||
|
||||
static boolean canEarlyTerminate(Sort searchSort, Sort indexSort) {
|
||||
final SortField[] fields1 = searchSort.getSort();
|
||||
final SortField[] fields2 = indexSort.getSort();
|
||||
// early termination is possible if fields1 is a prefix of fields2
|
||||
if (fields1.length > fields2.length) {
|
||||
return false;
|
||||
}
|
||||
return Arrays.asList(fields1).equals(Arrays.asList(fields2).subList(0, fields1.length));
|
||||
}
|
||||
|
||||
static int estimateRemainingHits(int hitCount, int doc, int maxDoc) {
|
||||
double hitRatio = (double) hitCount / (doc + 1);
|
||||
int remainingDocs = maxDoc - doc - 1;
|
||||
int remainingHits = (int) (remainingDocs * hitRatio);
|
||||
return remainingHits;
|
||||
}
|
||||
|
||||
/*
|
||||
* Implements a TopFieldCollector over one SortField criteria, with tracking
|
||||
* document scores and maxScore.
|
||||
*/
|
||||
private static class SimpleFieldCollector extends TopFieldCollector {
|
||||
|
||||
final Sort sort;
|
||||
final FieldValueHitQueue<Entry> queue;
|
||||
final boolean trackDocScores;
|
||||
final boolean trackMaxScore;
|
||||
final boolean mayNeedScoresTwice;
|
||||
final boolean trackTotalHits;
|
||||
|
||||
public SimpleFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields,
|
||||
boolean trackDocScores, boolean trackMaxScore) {
|
||||
boolean trackDocScores, boolean trackMaxScore, boolean trackTotalHits) {
|
||||
super(queue, numHits, fillFields, sort.needsScores() || trackDocScores || trackMaxScore);
|
||||
this.sort = sort;
|
||||
this.queue = queue;
|
||||
if (trackMaxScore) {
|
||||
maxScore = Float.NEGATIVE_INFINITY; // otherwise we would keep NaN
|
||||
|
@ -91,6 +112,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
// we might call scorer.score() several times per doc so wrapping the scorer
|
||||
// to cache scores would help
|
||||
this.mayNeedScoresTwice = sort.needsScores() && (trackDocScores || trackMaxScore);
|
||||
this.trackTotalHits = trackTotalHits;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -99,6 +121,10 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
|
||||
final LeafFieldComparator[] comparators = queue.getComparators(context);
|
||||
final int[] reverseMul = queue.getReverseMul();
|
||||
final boolean canEarlyTerminate = trackTotalHits == false &&
|
||||
trackMaxScore == false &&
|
||||
canEarlyTerminate(sort, context.reader().getMetaData().getSort());
|
||||
final int initialTotalHits = totalHits;
|
||||
|
||||
return new MultiComparatorLeafCollector(comparators, reverseMul, mayNeedScoresTwice) {
|
||||
|
||||
|
@ -118,8 +144,17 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
// since docs are visited in doc Id order, if compare is 0, it means
|
||||
// this document is largest than anything else in the queue, and
|
||||
// therefore not competitive.
|
||||
if (canEarlyTerminate) {
|
||||
// scale totalHits linearly based on the number of docs
|
||||
// and terminate collection
|
||||
totalHits += estimateRemainingHits(totalHits - initialTotalHits, doc, context.reader().maxDoc());
|
||||
earlyTerminated = true;
|
||||
throw new CollectionTerminatedException();
|
||||
} else {
|
||||
// just move to the next doc
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (trackDocScores && !trackMaxScore) {
|
||||
score = scorer.score();
|
||||
|
@ -156,21 +191,25 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
*/
|
||||
private final static class PagingFieldCollector extends TopFieldCollector {
|
||||
|
||||
final Sort sort;
|
||||
int collectedHits;
|
||||
final FieldValueHitQueue<Entry> queue;
|
||||
final boolean trackDocScores;
|
||||
final boolean trackMaxScore;
|
||||
final FieldDoc after;
|
||||
final boolean mayNeedScoresTwice;
|
||||
final boolean trackTotalHits;
|
||||
|
||||
public PagingFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, FieldDoc after, int numHits, boolean fillFields,
|
||||
boolean trackDocScores, boolean trackMaxScore) {
|
||||
boolean trackDocScores, boolean trackMaxScore, boolean trackTotalHits) {
|
||||
super(queue, numHits, fillFields, trackDocScores || trackMaxScore || sort.needsScores());
|
||||
this.sort = sort;
|
||||
this.queue = queue;
|
||||
this.trackDocScores = trackDocScores;
|
||||
this.trackMaxScore = trackMaxScore;
|
||||
this.after = after;
|
||||
this.mayNeedScoresTwice = sort.needsScores() && (trackDocScores || trackMaxScore);
|
||||
this.trackTotalHits = trackTotalHits;
|
||||
|
||||
// Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN.
|
||||
maxScore = Float.NEGATIVE_INFINITY;
|
||||
|
@ -188,6 +227,10 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
docBase = context.docBase;
|
||||
final int afterDoc = after.doc - docBase;
|
||||
final boolean canEarlyTerminate = trackTotalHits == false &&
|
||||
trackMaxScore == false &&
|
||||
canEarlyTerminate(sort, context.reader().getMetaData().getSort());
|
||||
final int initialTotalHits = totalHits;
|
||||
return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul(), mayNeedScoresTwice) {
|
||||
|
||||
@Override
|
||||
|
@ -210,9 +253,18 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
final int cmp = reverseMul * comparator.compareBottom(doc);
|
||||
if (cmp <= 0) {
|
||||
// not competitive since documents are visited in doc id order
|
||||
if (canEarlyTerminate) {
|
||||
// scale totalHits linearly based on the number of docs
|
||||
// and terminate collection
|
||||
totalHits += estimateRemainingHits(totalHits - initialTotalHits, doc, context.reader().maxDoc());
|
||||
earlyTerminated = true;
|
||||
throw new CollectionTerminatedException();
|
||||
} else {
|
||||
// just move to the next doc
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final int topCmp = reverseMul * comparator.compareTop(doc);
|
||||
if (topCmp > 0 || (topCmp == 0 && doc <= afterDoc)) {
|
||||
|
@ -270,6 +322,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
FieldValueHitQueue.Entry bottom = null;
|
||||
boolean queueFull;
|
||||
int docBase;
|
||||
boolean earlyTerminated = false;
|
||||
final boolean needsScores;
|
||||
|
||||
// Declaring the constructor private prevents extending this class by anyone
|
||||
|
@ -318,12 +371,16 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
* true affects performance as it incurs the score computation on
|
||||
* each result. Also, setting this true automatically sets
|
||||
* <code>trackDocScores</code> to true as well.
|
||||
* @param trackTotalHits
|
||||
* specifies whether the total number of hits should be tracked. If
|
||||
* set to false, the value of {@link TopFieldDocs#totalHits} will be
|
||||
* approximated.
|
||||
* @return a {@link TopFieldCollector} instance which will sort the results by
|
||||
* the sort criteria.
|
||||
*/
|
||||
public static TopFieldCollector create(Sort sort, int numHits,
|
||||
boolean fillFields, boolean trackDocScores, boolean trackMaxScore) {
|
||||
return create(sort, numHits, null, fillFields, trackDocScores, trackMaxScore);
|
||||
boolean fillFields, boolean trackDocScores, boolean trackMaxScore, boolean trackTotalHits) {
|
||||
return create(sort, numHits, null, fillFields, trackDocScores, trackMaxScore, trackTotalHits);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -357,11 +414,15 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
* true affects performance as it incurs the score computation on
|
||||
* each result. Also, setting this true automatically sets
|
||||
* <code>trackDocScores</code> to true as well.
|
||||
* @param trackTotalHits
|
||||
* specifies whether the total number of hits should be tracked. If
|
||||
* set to false, the value of {@link TopFieldDocs#totalHits} will be
|
||||
* approximated.
|
||||
* @return a {@link TopFieldCollector} instance which will sort the results by
|
||||
* the sort criteria.
|
||||
*/
|
||||
public static TopFieldCollector create(Sort sort, int numHits, FieldDoc after,
|
||||
boolean fillFields, boolean trackDocScores, boolean trackMaxScore) {
|
||||
boolean fillFields, boolean trackDocScores, boolean trackMaxScore, boolean trackTotalHits) {
|
||||
|
||||
if (sort.fields.length == 0) {
|
||||
throw new IllegalArgumentException("Sort must contain at least one field");
|
||||
|
@ -374,7 +435,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
|
||||
|
||||
if (after == null) {
|
||||
return new SimpleFieldCollector(sort, queue, numHits, fillFields, trackDocScores, trackMaxScore);
|
||||
return new SimpleFieldCollector(sort, queue, numHits, fillFields, trackDocScores, trackMaxScore, trackTotalHits);
|
||||
} else {
|
||||
if (after.fields == null) {
|
||||
throw new IllegalArgumentException("after.fields wasn't set; you must pass fillFields=true for the previous search");
|
||||
|
@ -384,7 +445,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
throw new IllegalArgumentException("after.fields has " + after.fields.length + " values but sort has " + sort.getSort().length);
|
||||
}
|
||||
|
||||
return new PagingFieldCollector(sort, queue, after, numHits, fillFields, trackDocScores, trackMaxScore);
|
||||
return new PagingFieldCollector(sort, queue, after, numHits, fillFields, trackDocScores, trackMaxScore, trackTotalHits);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -442,4 +503,9 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
public TopFieldDocs topDocs() {
|
||||
return (TopFieldDocs) super.topDocs();
|
||||
}
|
||||
|
||||
/** Return whether collection terminated early. */
|
||||
public boolean isEarlyTerminated() {
|
||||
return earlyTerminated;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -61,7 +61,6 @@ import org.apache.lucene.document.StoredField;
|
|||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.EarlyTerminatingSortingCollector;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
|
@ -2335,13 +2334,12 @@ public class TestIndexSorting extends LuceneTestCase {
|
|||
System.out.println("TEST: iter=" + iter + " numHits=" + numHits);
|
||||
}
|
||||
|
||||
TopFieldCollector c1 = TopFieldCollector.create(sort, numHits, true, true, true);
|
||||
TopFieldCollector c1 = TopFieldCollector.create(sort, numHits, true, true, true, true);
|
||||
s1.search(new MatchAllDocsQuery(), c1);
|
||||
TopDocs hits1 = c1.topDocs();
|
||||
|
||||
TopFieldCollector c2 = TopFieldCollector.create(sort, numHits, true, true, true);
|
||||
EarlyTerminatingSortingCollector c3 = new EarlyTerminatingSortingCollector(c2, sort, numHits);
|
||||
s2.search(new MatchAllDocsQuery(), c3);
|
||||
TopFieldCollector c2 = TopFieldCollector.create(sort, numHits, true, true, true, false);
|
||||
s2.search(new MatchAllDocsQuery(), c2);
|
||||
|
||||
TopDocs hits2 = c2.topDocs();
|
||||
|
||||
|
@ -2362,7 +2360,7 @@ public class TestIndexSorting extends LuceneTestCase {
|
|||
ScoreDoc hit1 = hits1.scoreDocs[i];
|
||||
ScoreDoc hit2 = hits2.scoreDocs[i];
|
||||
assertEquals(r1.document(hit1.doc).get("id"), r2.document(hit2.doc).get("id"));
|
||||
assertEquals(((FieldDoc) hit1).fields, ((FieldDoc) hit2).fields);
|
||||
assertArrayEquals(((FieldDoc) hit1).fields, ((FieldDoc) hit2).fields);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -386,10 +386,10 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// check diff (randomized) scorers (from AssertingSearcher) produce the same results
|
||||
TopFieldCollector collector = TopFieldCollector.create(sort, 1000, false, true, true);
|
||||
TopFieldCollector collector = TopFieldCollector.create(sort, 1000, false, true, true, false);
|
||||
searcher.search(q1, collector);
|
||||
ScoreDoc[] hits1 = collector.topDocs().scoreDocs;
|
||||
collector = TopFieldCollector.create(sort, 1000, false, true, true);
|
||||
collector = TopFieldCollector.create(sort, 1000, false, true, true, false);
|
||||
searcher.search(q1, collector);
|
||||
ScoreDoc[] hits2 = collector.topDocs().scoreDocs;
|
||||
tot+=hits2.length;
|
||||
|
@ -402,10 +402,10 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||
assertEquals(mulFactor*collector.totalHits + NUM_EXTRA_DOCS/2, hits4.totalHits);
|
||||
|
||||
// test diff (randomized) scorers produce the same results on bigSearcher as well
|
||||
collector = TopFieldCollector.create(sort, 1000 * mulFactor, false, true, true);
|
||||
collector = TopFieldCollector.create(sort, 1000 * mulFactor, false, true, true, false);
|
||||
bigSearcher.search(q1, collector);
|
||||
hits1 = collector.topDocs().scoreDocs;
|
||||
collector = TopFieldCollector.create(sort, 1000 * mulFactor, false, true, true);
|
||||
collector = TopFieldCollector.create(sort, 1000 * mulFactor, false, true, true, false);
|
||||
bigSearcher.search(q1, collector);
|
||||
hits2 = collector.topDocs().scoreDocs;
|
||||
CheckHits.checkEqual(q1, hits1, hits2);
|
||||
|
|
|
@ -86,7 +86,7 @@ public class TestElevationComparator extends LuceneTestCase {
|
|||
new SortField(null, SortField.Type.SCORE, reversed)
|
||||
);
|
||||
|
||||
TopDocsCollector<Entry> topCollector = TopFieldCollector.create(sort, 50, false, true, true);
|
||||
TopDocsCollector<Entry> topCollector = TopFieldCollector.create(sort, 50, false, true, true, true);
|
||||
searcher.search(newq.build(), topCollector);
|
||||
|
||||
TopDocs topDocs = topCollector.topDocs(0, 10);
|
||||
|
|
|
@ -281,7 +281,7 @@ public class TestTopDocsMerge extends LuceneTestCase {
|
|||
topHits = searcher.search(query, numHits);
|
||||
}
|
||||
} else {
|
||||
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true);
|
||||
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true, true);
|
||||
searcher.search(query, c);
|
||||
if (useFrom) {
|
||||
from = TestUtil.nextInt(random(), 0, numHits - 1);
|
||||
|
@ -330,7 +330,7 @@ public class TestTopDocsMerge extends LuceneTestCase {
|
|||
if (sort == null) {
|
||||
subHits = subSearcher.search(w, numHits);
|
||||
} else {
|
||||
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true);
|
||||
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true, true);
|
||||
subSearcher.search(w, c);
|
||||
subHits = c.topDocs(0, numHits);
|
||||
}
|
||||
|
|
|
@ -71,7 +71,7 @@ public class TestTopFieldCollector extends LuceneTestCase {
|
|||
for(int i = 0; i < sort.length; i++) {
|
||||
Query q = new MatchAllDocsQuery();
|
||||
TopDocsCollector<Entry> tdc = TopFieldCollector.create(sort[i], 10, false,
|
||||
false, false);
|
||||
false, false, true);
|
||||
|
||||
is.search(q, tdc);
|
||||
|
||||
|
@ -90,7 +90,7 @@ public class TestTopFieldCollector extends LuceneTestCase {
|
|||
for(int i = 0; i < sort.length; i++) {
|
||||
Query q = new MatchAllDocsQuery();
|
||||
TopDocsCollector<Entry> tdc = TopFieldCollector.create(sort[i], 10, true, false,
|
||||
false);
|
||||
false, true);
|
||||
|
||||
is.search(q, tdc);
|
||||
|
||||
|
@ -110,7 +110,7 @@ public class TestTopFieldCollector extends LuceneTestCase {
|
|||
for(int i = 0; i < sort.length; i++) {
|
||||
Query q = new MatchAllDocsQuery();
|
||||
TopDocsCollector<Entry> tdc = TopFieldCollector.create(sort[i], 10, true, true,
|
||||
false);
|
||||
false, true);
|
||||
|
||||
is.search(q, tdc);
|
||||
|
||||
|
@ -131,7 +131,7 @@ public class TestTopFieldCollector extends LuceneTestCase {
|
|||
for(int i = 0; i < sort.length; i++) {
|
||||
Query q = new MatchAllDocsQuery();
|
||||
TopDocsCollector<Entry> tdc = TopFieldCollector.create(sort[i], 10, true, true,
|
||||
false);
|
||||
false, true);
|
||||
|
||||
is.search(q, tdc);
|
||||
|
||||
|
@ -151,7 +151,7 @@ public class TestTopFieldCollector extends LuceneTestCase {
|
|||
for(int i = 0; i < sort.length; i++) {
|
||||
Query q = new MatchAllDocsQuery();
|
||||
TopDocsCollector<Entry> tdc = TopFieldCollector.create(sort[i], 10, true, true,
|
||||
true);
|
||||
true, true);
|
||||
|
||||
is.search(q, tdc);
|
||||
|
||||
|
@ -169,7 +169,7 @@ public class TestTopFieldCollector extends LuceneTestCase {
|
|||
// Two Sort criteria to instantiate the multi/single comparators.
|
||||
Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort() };
|
||||
for(int i = 0; i < sort.length; i++) {
|
||||
TopDocsCollector<Entry> tdc = TopFieldCollector.create(sort[i], 10, true, true, true);
|
||||
TopDocsCollector<Entry> tdc = TopFieldCollector.create(sort[i], 10, true, true, true, true);
|
||||
TopDocs td = tdc.topDocs();
|
||||
assertEquals(0, td.totalHits);
|
||||
assertTrue(Float.isNaN(td.getMaxScore()));
|
||||
|
@ -204,7 +204,7 @@ public class TestTopFieldCollector extends LuceneTestCase {
|
|||
for (Sort sort : new Sort[] {new Sort(SortField.FIELD_SCORE), new Sort(new SortField("f", SortField.Type.SCORE))}) {
|
||||
for (boolean doDocScores : new boolean[] {false, true}) {
|
||||
for (boolean doMaxScore : new boolean[] {false, true}) {
|
||||
final TopFieldCollector topCollector = TopFieldCollector.create(sort, TestUtil.nextInt(random(), 1, 2), true, doDocScores, doMaxScore);
|
||||
final TopFieldCollector topCollector = TopFieldCollector.create(sort, TestUtil.nextInt(random(), 1, 2), true, doDocScores, doMaxScore, true);
|
||||
final Collector assertingCollector = new Collector() {
|
||||
@Override
|
||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
|
|
|
@ -30,8 +30,8 @@ import org.apache.lucene.document.NumericDocValuesField;
|
|||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MockRandomMergePolicy;
|
||||
import org.apache.lucene.index.QueryTimeout;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SerialMergeScheduler;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -41,7 +41,7 @@ import org.apache.lucene.util.TestUtil;
|
|||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
|
||||
public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
|
||||
public class TestTopFieldCollectorEarlyTermination extends LuceneTestCase {
|
||||
|
||||
private int numDocs;
|
||||
private List<String> terms;
|
||||
|
@ -111,18 +111,36 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testEarlyTermination() throws IOException {
|
||||
doTestEarlyTermination(false);
|
||||
}
|
||||
|
||||
public void testEarlyTerminationWhenPaging() throws IOException {
|
||||
doTestEarlyTermination(true);
|
||||
}
|
||||
|
||||
private void doTestEarlyTermination(boolean paging) throws IOException {
|
||||
final int iters = atLeast(8);
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
createRandomIndex(false);
|
||||
int maxSegmentSize = 0;
|
||||
for (LeafReaderContext ctx : reader.leaves()) {
|
||||
maxSegmentSize = Math.max(ctx.reader().numDocs(), maxSegmentSize);
|
||||
}
|
||||
for (int j = 0; j < iters; ++j) {
|
||||
final IndexSearcher searcher = newSearcher(reader);
|
||||
final int numHits = TestUtil.nextInt(random(), 1, numDocs);
|
||||
final Sort sort = new Sort(new SortField("ndv1", SortField.Type.LONG, false));
|
||||
FieldDoc after;
|
||||
if (paging) {
|
||||
TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
|
||||
after = (FieldDoc) td.scoreDocs[td.scoreDocs.length - 1];
|
||||
} else {
|
||||
after = null;
|
||||
}
|
||||
final boolean fillFields = random().nextBoolean();
|
||||
final boolean trackDocScores = random().nextBoolean();
|
||||
final boolean trackMaxScore = random().nextBoolean();
|
||||
final TopFieldCollector collector1 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore);
|
||||
final TopFieldCollector collector2 = TopFieldCollector.create(sort, numHits, fillFields, trackDocScores, trackMaxScore);
|
||||
final TopFieldCollector collector1 = TopFieldCollector.create(sort, numHits, after, fillFields, trackDocScores, trackMaxScore, true);
|
||||
final TopFieldCollector collector2 = TopFieldCollector.create(sort, numHits, after, fillFields, trackDocScores, trackMaxScore, false);
|
||||
|
||||
final Query query;
|
||||
if (random().nextBoolean()) {
|
||||
|
@ -131,115 +149,64 @@ public class TestEarlyTerminatingSortingCollector extends LuceneTestCase {
|
|||
query = new MatchAllDocsQuery();
|
||||
}
|
||||
searcher.search(query, collector1);
|
||||
searcher.search(query, new EarlyTerminatingSortingCollector(collector2, sort, numHits));
|
||||
assertTrue(collector1.getTotalHits() >= collector2.getTotalHits());
|
||||
assertTopDocsEquals(collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
|
||||
searcher.search(query, collector2);
|
||||
TopDocs td1 = collector1.topDocs();
|
||||
TopDocs td2 = collector2.topDocs();
|
||||
|
||||
assertFalse(collector1.isEarlyTerminated());
|
||||
if (trackMaxScore == false && paging == false && maxSegmentSize >= numHits && query instanceof MatchAllDocsQuery) {
|
||||
// Make sure that we sometimes early terminate
|
||||
assertTrue(collector2.isEarlyTerminated());
|
||||
}
|
||||
if (collector2.isEarlyTerminated()) {
|
||||
assertTrue(td2.totalHits >= td1.scoreDocs.length);
|
||||
assertTrue(td2.totalHits <= reader.maxDoc());
|
||||
} else {
|
||||
assertEquals(td2.totalHits, td1.totalHits);
|
||||
}
|
||||
assertTopDocsEquals(td1.scoreDocs, td2.scoreDocs);
|
||||
}
|
||||
closeIndex();
|
||||
}
|
||||
}
|
||||
|
||||
public void testCanEarlyTerminate() {
|
||||
assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
|
||||
assertTrue(TopFieldCollector.canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG))));
|
||||
|
||||
assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
|
||||
assertTrue(TopFieldCollector.canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
|
||||
|
||||
assertTrue(EarlyTerminatingSortingCollector.canEarlyTerminate(
|
||||
assertTrue(TopFieldCollector.canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
|
||||
|
||||
assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
|
||||
assertFalse(TopFieldCollector.canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG, true)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG, false))));
|
||||
|
||||
assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
|
||||
assertFalse(TopFieldCollector.canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG))));
|
||||
|
||||
assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
|
||||
assertFalse(TopFieldCollector.canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("c", SortField.Type.STRING))));
|
||||
|
||||
assertFalse(EarlyTerminatingSortingCollector.canEarlyTerminate(
|
||||
assertFalse(TopFieldCollector.canEarlyTerminate(
|
||||
new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)),
|
||||
new Sort(new SortField("c", SortField.Type.LONG), new SortField("b", SortField.Type.STRING))));
|
||||
}
|
||||
|
||||
public void testEarlyTerminationDifferentSorter() throws IOException {
|
||||
createRandomIndex(true);
|
||||
|
||||
Sort sort = new Sort(new SortField("ndv2", SortField.Type.LONG, false));
|
||||
Collector c = new EarlyTerminatingSortingCollector(TopFieldCollector.create(sort, 10, true, true, true), sort, 10);
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
Exception e = expectThrows(IllegalStateException.class,
|
||||
() -> {
|
||||
searcher.search(new MatchAllDocsQuery(), c);
|
||||
});
|
||||
assertEquals("Cannot early terminate with sort order <long: \"ndv2\"> if segments are sorted with <long: \"ndv1\">", e.getMessage());
|
||||
closeIndex();
|
||||
}
|
||||
|
||||
private static void assertTopDocsEquals(ScoreDoc[] scoreDocs1, ScoreDoc[] scoreDocs2) {
|
||||
assertEquals(scoreDocs1.length, scoreDocs2.length);
|
||||
for (int i = 0; i < scoreDocs1.length; ++i) {
|
||||
final ScoreDoc scoreDoc1 = scoreDocs1[i];
|
||||
final ScoreDoc scoreDoc2 = scoreDocs2[i];
|
||||
assertEquals(scoreDoc1.doc, scoreDoc2.doc);
|
||||
assertEquals(scoreDoc1.score, scoreDoc2.score, 0.001f);
|
||||
}
|
||||
}
|
||||
|
||||
private static class TestTerminatedEarlySimpleCollector extends SimpleCollector {
|
||||
private boolean collectedSomething;
|
||||
public boolean collectedSomething() {
|
||||
return collectedSomething;
|
||||
}
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
collectedSomething = true;
|
||||
}
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static class TestEarlyTerminatingSortingcollectorQueryTimeout implements QueryTimeout {
|
||||
final private boolean shouldExit;
|
||||
public TestEarlyTerminatingSortingcollectorQueryTimeout(boolean shouldExit) {
|
||||
this.shouldExit = shouldExit;
|
||||
}
|
||||
public boolean shouldExit() {
|
||||
return shouldExit;
|
||||
}
|
||||
}
|
||||
|
||||
public void testTerminatedEarly() throws IOException {
|
||||
final int iters = atLeast(8);
|
||||
for (int i = 0; i < iters; ++i) {
|
||||
createRandomIndex(true);
|
||||
|
||||
final IndexSearcher searcher = new IndexSearcher(reader); // future TODO: use newSearcher(reader);
|
||||
final Query query = new MatchAllDocsQuery(); // search for everything/anything
|
||||
|
||||
final TestTerminatedEarlySimpleCollector collector1 = new TestTerminatedEarlySimpleCollector();
|
||||
searcher.search(query, collector1);
|
||||
|
||||
final TestTerminatedEarlySimpleCollector collector2 = new TestTerminatedEarlySimpleCollector();
|
||||
final EarlyTerminatingSortingCollector etsCollector = new EarlyTerminatingSortingCollector(collector2, sort, 1);
|
||||
searcher.search(query, etsCollector);
|
||||
|
||||
assertTrue("collector1="+collector1.collectedSomething()+" vs. collector2="+collector2.collectedSomething(), collector1.collectedSomething() == collector2.collectedSomething());
|
||||
|
||||
if (collector1.collectedSomething()) {
|
||||
// we collected something and since we modestly asked for just one document we should have terminated early
|
||||
assertTrue("should have terminated early (searcher.reader="+searcher.reader+")", etsCollector.terminatedEarly());
|
||||
}
|
||||
closeIndex();
|
||||
assertEquals(scoreDoc1.score, scoreDoc2.score, 0f);
|
||||
}
|
||||
}
|
||||
|
|
@ -240,7 +240,7 @@ public class DrillSideways {
|
|||
|
||||
@Override
|
||||
public TopFieldCollector newCollector() throws IOException {
|
||||
return TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore);
|
||||
return TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -259,7 +259,7 @@ public class DrillSideways {
|
|||
} else {
|
||||
|
||||
final TopFieldCollector hitCollector =
|
||||
TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore);
|
||||
TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore, true);
|
||||
DrillSidewaysResult r = search(query, hitCollector);
|
||||
return new DrillSidewaysResult(r.facets, hitCollector.topDocs());
|
||||
}
|
||||
|
|
|
@ -232,7 +232,8 @@ public class FacetsCollector extends SimpleCollector implements Collector {
|
|||
(FieldDoc) after,
|
||||
fillFields,
|
||||
doDocScores,
|
||||
doMaxScore);
|
||||
doMaxScore,
|
||||
true); // TODO: can we disable exact hit counts
|
||||
} else {
|
||||
hitsCollector = TopScoreDocCollector.create(n, after);
|
||||
}
|
||||
|
|
|
@ -305,7 +305,7 @@ public class BlockGroupingCollector extends SimpleCollector {
|
|||
collector = TopScoreDocCollector.create(maxDocsPerGroup);
|
||||
} else {
|
||||
// Sort by fields
|
||||
collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, needsScores, needsScores);
|
||||
collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, needsScores, needsScores, true); // TODO: disable exact counts?
|
||||
}
|
||||
|
||||
LeafCollector leafCollector = collector.getLeafCollector(og.readerContext);
|
||||
|
|
|
@ -70,7 +70,7 @@ public class TopGroupsCollector<T> extends SecondPassGroupingCollector<T> {
|
|||
this.needsScores = getScores || getMaxScores || withinGroupSort.needsScores();
|
||||
this.supplier = withinGroupSort == Sort.RELEVANCE ?
|
||||
() -> TopScoreDocCollector.create(maxDocsPerGroup) :
|
||||
() -> TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores);
|
||||
() -> TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true); // TODO: disable exact counts?
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -59,8 +59,6 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.EarlyTerminatingSortingCollector;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
|
@ -649,11 +647,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
//System.out.println("finalQuery=" + finalQuery);
|
||||
|
||||
// Sort by weight, descending:
|
||||
TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false);
|
||||
|
||||
// We sorted postings by weight during indexing, so we
|
||||
// only retrieve the first num hits now:
|
||||
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
|
||||
TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false, false);
|
||||
List<LookupResult> results = null;
|
||||
SearcherManager mgr;
|
||||
IndexSearcher searcher;
|
||||
|
@ -663,7 +657,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
}
|
||||
try {
|
||||
//System.out.println("got searcher=" + searcher);
|
||||
searcher.search(finalQuery, c2);
|
||||
searcher.search(finalQuery, c);
|
||||
|
||||
TopFieldDocs hits = c.topDocs();
|
||||
|
||||
|
|
|
@ -537,7 +537,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
|
|||
DocIdSetIterator iterator = new BitSetIterator(groupBits, 0); // cost is not useful here
|
||||
int group;
|
||||
while ((group = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
Collector collector = (sort == null) ? TopScoreDocCollector.create(limit) : TopFieldCollector.create(sort, limit, false, false, false);
|
||||
Collector collector = (sort == null) ? TopScoreDocCollector.create(limit) : TopFieldCollector.create(sort, limit, false, false, false, true);
|
||||
groups.put(group, collector);
|
||||
}
|
||||
|
||||
|
@ -629,7 +629,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
|
|||
Iterator<LongCursor> iterator = groupSet.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
LongCursor cursor = iterator.next();
|
||||
Collector collector = (sort == null) ? TopScoreDocCollector.create(limit) : TopFieldCollector.create(sort, limit, false, false, false);
|
||||
Collector collector = (sort == null) ? TopScoreDocCollector.create(limit) : TopFieldCollector.create(sort, limit, false, false, false, true);
|
||||
groups.put(cursor.value, collector);
|
||||
}
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
package org.apache.solr.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
@ -28,7 +28,9 @@ import org.apache.lucene.search.FilterCollector;
|
|||
import org.apache.lucene.search.FilterLeafCollector;
|
||||
import org.apache.lucene.search.LeafCollector;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocsCollector;
|
||||
import org.apache.lucene.search.TopFieldCollector;
|
||||
import org.apache.lucene.search.TotalHitCountCollector;
|
||||
|
||||
/**
|
||||
|
@ -49,10 +51,11 @@ import org.apache.lucene.search.TotalHitCountCollector;
|
|||
* hit count} will be vastly underestimated since not all matching documents will have
|
||||
* been collected.
|
||||
*
|
||||
* @deprecated Use {@link TopFieldCollector} and set trackTotalHits to false.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public class EarlyTerminatingSortingCollector extends FilterCollector {
|
||||
@Deprecated
|
||||
final class EarlyTerminatingSortingCollector extends FilterCollector {
|
||||
|
||||
/** Returns whether collection can be early-terminated if it sorts with the
|
||||
* provided {@link Sort} and if segments are merged with the provided
|
|
@ -881,7 +881,7 @@ public class Grouping {
|
|||
if (sort == null || sort.equals(Sort.RELEVANCE)) {
|
||||
return TopScoreDocCollector.create(groupDocsToCollect);
|
||||
} else {
|
||||
return TopFieldCollector.create(searcher.weightSort(sort), groupDocsToCollect, false, needScores, needScores);
|
||||
return TopFieldCollector.create(searcher.weightSort(sort), groupDocsToCollect, false, needScores, needScores, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ public class ReRankCollector extends TopDocsCollector {
|
|||
this.mainCollector = TopScoreDocCollector.create(Math.max(this.reRankDocs, length));
|
||||
} else {
|
||||
sort = sort.rewrite(searcher);
|
||||
this.mainCollector = TopFieldCollector.create(sort, Math.max(this.reRankDocs, length), false, true, true);
|
||||
this.mainCollector = TopFieldCollector.create(sort, Math.max(this.reRankDocs, length), false, true, true, true);
|
||||
}
|
||||
this.searcher = searcher;
|
||||
this.reRankQueryRescorer = reRankQueryRescorer;
|
||||
|
|
|
@ -1518,7 +1518,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
// ... see comments in populateNextCursorMarkFromTopDocs for cache issues (SOLR-5595)
|
||||
final boolean fillFields = (null != cursor);
|
||||
final FieldDoc searchAfter = (null != cursor ? cursor.getSearchAfterFieldDoc() : null);
|
||||
return TopFieldCollector.create(weightedSort, len, searchAfter, fillFields, needScores, needScores);
|
||||
return TopFieldCollector.create(weightedSort, len, searchAfter, fillFields, needScores, needScores, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -127,7 +127,7 @@ public class QueryCommand implements Command<QueryCommandResult> {
|
|||
if (sort == null || sort.equals(Sort.RELEVANCE)) {
|
||||
collector = TopScoreDocCollector.create(docsToCollect);
|
||||
} else {
|
||||
collector = TopFieldCollector.create(sort, docsToCollect, true, needScores, needScores);
|
||||
collector = TopFieldCollector.create(sort, docsToCollect, true, needScores, needScores, true);
|
||||
}
|
||||
filterCollector = new FilterCollector(docSet, collector);
|
||||
return Arrays.asList((Collector) filterCollector);
|
||||
|
|
|
@ -285,7 +285,7 @@ public class TestSort extends SolrTestCaseJ4 {
|
|||
boolean trackScores = r.nextBoolean();
|
||||
boolean trackMaxScores = r.nextBoolean();
|
||||
boolean scoreInOrder = r.nextBoolean();
|
||||
final TopFieldCollector topCollector = TopFieldCollector.create(sort, top, true, trackScores, trackMaxScores);
|
||||
final TopFieldCollector topCollector = TopFieldCollector.create(sort, top, true, trackScores, trackMaxScores, true);
|
||||
|
||||
final List<MyDoc> collectedDocs = new ArrayList<>();
|
||||
// delegate and collect docs ourselves
|
||||
|
|
Loading…
Reference in New Issue